@rulebricks/cli 2.1.6 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/README.md +75 -14
  2. package/cluster-setup/aws/README.md +123 -0
  3. package/cluster-setup/aws/check-aws-access.sh +242 -0
  4. package/cluster-setup/aws/parameters.json +13 -0
  5. package/cluster-setup/aws/rulebricks-cluster.cfn.yaml +355 -0
  6. package/cluster-setup/azure/README.md +141 -0
  7. package/cluster-setup/azure/check-aks-prereqs.sh +276 -0
  8. package/cluster-setup/azure/parameters.json +30 -0
  9. package/cluster-setup/azure/rulebricks-cluster.bicep +546 -0
  10. package/cluster-setup/gcp/README.md +189 -0
  11. package/cluster-setup/gcp/check-gke-prereqs.sh +260 -0
  12. package/dist/commands/backup.d.ts +5 -0
  13. package/dist/commands/backup.js +104 -0
  14. package/dist/commands/deploy.d.ts +3 -1
  15. package/dist/commands/deploy.js +226 -326
  16. package/dist/commands/destroy.d.ts +1 -1
  17. package/dist/commands/destroy.js +73 -123
  18. package/dist/commands/init.d.ts +5 -1
  19. package/dist/commands/init.js +78 -47
  20. package/dist/commands/list.d.ts +1 -0
  21. package/dist/commands/list.js +74 -0
  22. package/dist/commands/open.d.ts +1 -1
  23. package/dist/commands/open.js +4 -12
  24. package/dist/commands/redeploy.d.ts +6 -0
  25. package/dist/commands/redeploy.js +310 -0
  26. package/dist/commands/restore.d.ts +5 -0
  27. package/dist/commands/restore.js +338 -0
  28. package/dist/commands/status.js +62 -49
  29. package/dist/commands/upgrade.js +74 -51
  30. package/dist/components/DNSWaitScreen.d.ts +5 -1
  31. package/dist/components/DNSWaitScreen.js +47 -41
  32. package/dist/components/Wizard/WizardContext.d.ts +174 -29
  33. package/dist/components/Wizard/WizardContext.js +896 -91
  34. package/dist/components/Wizard/steps/CloudProviderStep.js +192 -102
  35. package/dist/components/Wizard/steps/DomainStep.js +5 -24
  36. package/dist/components/Wizard/steps/ExternalServicesStep.d.ts +6 -0
  37. package/dist/components/Wizard/steps/ExternalServicesStep.js +645 -0
  38. package/dist/components/Wizard/steps/FeatureConfigStep.d.ts +2 -1
  39. package/dist/components/Wizard/steps/FeatureConfigStep.js +959 -248
  40. package/dist/components/Wizard/steps/FeaturesStep.js +31 -35
  41. package/dist/components/Wizard/steps/ObservabilityStep.d.ts +6 -0
  42. package/dist/components/Wizard/steps/ObservabilityStep.js +137 -0
  43. package/dist/components/Wizard/steps/ReviewStep.d.ts +2 -1
  44. package/dist/components/Wizard/steps/ReviewStep.js +56 -7
  45. package/dist/components/Wizard/steps/StorageStep.d.ts +9 -0
  46. package/dist/components/Wizard/steps/StorageStep.js +592 -0
  47. package/dist/components/Wizard/steps/SupabaseCredentialsStep.js +20 -21
  48. package/dist/components/Wizard/steps/VersionStep.js +45 -23
  49. package/dist/components/Wizard/steps/index.d.ts +3 -3
  50. package/dist/components/Wizard/steps/index.js +3 -3
  51. package/dist/components/common/CommandApproval.d.ts +12 -0
  52. package/dist/components/common/CommandApproval.js +91 -0
  53. package/dist/components/common/DeploymentPicker.d.ts +14 -0
  54. package/dist/components/common/DeploymentPicker.js +16 -0
  55. package/dist/components/common/index.d.ts +2 -0
  56. package/dist/components/common/index.js +2 -0
  57. package/dist/index.js +94 -62
  58. package/dist/lib/cloudCli.d.ts +134 -63
  59. package/dist/lib/cloudCli.js +512 -220
  60. package/dist/lib/clusterSetupDefaults.d.ts +30 -0
  61. package/dist/lib/clusterSetupDefaults.js +64 -0
  62. package/dist/lib/commandApproval.d.ts +26 -0
  63. package/dist/lib/commandApproval.js +114 -0
  64. package/dist/lib/config.d.ts +12 -10
  65. package/dist/lib/config.js +91 -33
  66. package/dist/lib/configFixtures.d.ts +5 -0
  67. package/dist/lib/configFixtures.js +513 -0
  68. package/dist/lib/deploymentHealth.d.ts +32 -0
  69. package/dist/lib/deploymentHealth.js +157 -0
  70. package/dist/lib/dns.d.ts +1 -1
  71. package/dist/lib/dns.js +19 -1
  72. package/dist/lib/dns.test.d.ts +1 -0
  73. package/dist/lib/dns.test.js +27 -0
  74. package/dist/lib/dockerHub.d.ts +12 -1
  75. package/dist/lib/dockerHub.js +18 -8
  76. package/dist/lib/helm.d.ts +4 -0
  77. package/dist/lib/helm.js +16 -0
  78. package/dist/lib/helmValues.d.ts +25 -0
  79. package/dist/lib/helmValues.js +1937 -259
  80. package/dist/lib/helmValues.test.d.ts +1 -0
  81. package/dist/lib/helmValues.test.js +966 -0
  82. package/dist/lib/htpasswd.d.ts +1 -0
  83. package/dist/lib/htpasswd.js +15 -0
  84. package/dist/lib/kubernetes.d.ts +126 -13
  85. package/dist/lib/kubernetes.js +624 -134
  86. package/dist/lib/secrets.d.ts +23 -0
  87. package/dist/lib/secrets.js +158 -0
  88. package/dist/lib/validateValues.d.ts +31 -0
  89. package/dist/lib/validateValues.js +253 -0
  90. package/dist/lib/versions.d.ts +82 -11
  91. package/dist/lib/versions.js +131 -31
  92. package/dist/lib/versions.test.d.ts +1 -0
  93. package/dist/lib/versions.test.js +81 -0
  94. package/dist/lib/wizardSteps.d.ts +14 -0
  95. package/dist/lib/wizardSteps.js +23 -0
  96. package/dist/lib/workloadIdentity.d.ts +26 -0
  97. package/dist/lib/workloadIdentity.js +323 -0
  98. package/dist/lib/workloadIdentity.test.d.ts +1 -0
  99. package/dist/lib/workloadIdentity.test.js +57 -0
  100. package/dist/types/index.d.ts +2152 -95
  101. package/dist/types/index.js +554 -286
  102. package/package.json +10 -4
  103. package/schema/values.schema.json +1934 -0
  104. package/dist/components/Wizard/steps/CredentialsStep.d.ts +0 -6
  105. package/dist/components/Wizard/steps/CredentialsStep.js +0 -22
  106. package/dist/components/Wizard/steps/DeploymentModeStep.d.ts +0 -5
  107. package/dist/components/Wizard/steps/DeploymentModeStep.js +0 -26
  108. package/dist/components/Wizard/steps/TierStep.d.ts +0 -6
  109. package/dist/components/Wizard/steps/TierStep.js +0 -29
  110. package/dist/lib/terraform.d.ts +0 -66
  111. package/dist/lib/terraform.js +0 -754
  112. package/terraform/aws/main.tf +0 -355
  113. package/terraform/azure/main.tf +0 -371
  114. package/terraform/gcp/main.tf +0 -407
@@ -165,6 +165,150 @@ export async function getCurrentContext() {
165
165
  return null;
166
166
  }
167
167
  }
168
+ function parseCpuToCores(cpu) {
169
+ if (cpu.endsWith("n"))
170
+ return Number(cpu.slice(0, -1)) / 1_000_000_000;
171
+ if (cpu.endsWith("u"))
172
+ return Number(cpu.slice(0, -1)) / 1_000_000;
173
+ if (cpu.endsWith("m"))
174
+ return Number(cpu.slice(0, -1)) / 1_000;
175
+ return Number(cpu);
176
+ }
177
+ function parseMemoryToGi(memory) {
178
+ const match = memory.match(/^(\d+(?:\.\d+)?)([KMGTP]i?|[kMGTPE])?$/);
179
+ if (!match)
180
+ return 0;
181
+ const value = Number(match[1]);
182
+ const unit = match[2] || "";
183
+ const multipliers = {
184
+ Ki: 1 / 1024 / 1024,
185
+ Mi: 1 / 1024,
186
+ Gi: 1,
187
+ Ti: 1024,
188
+ Pi: 1024 * 1024,
189
+ K: 1000 / 1024 / 1024 / 1024,
190
+ M: 1000 ** 2 / 1024 ** 3,
191
+ G: 1000 ** 3 / 1024 ** 3,
192
+ T: 1000 ** 4 / 1024 ** 3,
193
+ P: 1000 ** 5 / 1024 ** 3,
194
+ };
195
+ return value * (multipliers[unit] ?? 1 / 1024 ** 3);
196
+ }
197
+ function roundUpForEligibility(value) {
198
+ return Math.ceil(value);
199
+ }
200
+ function normalizeNodeArchitecture(architecture) {
201
+ if (architecture === "amd64" || architecture === "x86_64")
202
+ return "amd64";
203
+ if (architecture === "arm64" || architecture === "aarch64")
204
+ return "arm64";
205
+ return null;
206
+ }
207
+ function summarizeNodeArchitecture(architectures) {
208
+ if (architectures.size === 0)
209
+ return "unknown";
210
+ if (architectures.size > 1)
211
+ return "mixed";
212
+ return architectures.has("arm64") ? "arm64" : "amd64";
213
+ }
214
+ async function getStorageClasses() {
215
+ try {
216
+ const { stdout } = await execa("kubectl", ["get", "storageclass", "-o", "json"], { timeout: 15000 });
217
+ const data = JSON.parse(stdout);
218
+ return (data.items ?? [])
219
+ .map((storageClass) => {
220
+ const annotations = storageClass.metadata?.annotations ?? {};
221
+ return {
222
+ name: storageClass.metadata?.name || "",
223
+ provisioner: storageClass.provisioner || "",
224
+ isDefault: annotations["storageclass.kubernetes.io/is-default-class"] ===
225
+ "true" ||
226
+ annotations["storageclass.beta.kubernetes.io/is-default-class"] ===
227
+ "true",
228
+ volumeBindingMode: storageClass.volumeBindingMode,
229
+ allowVolumeExpansion: storageClass.allowVolumeExpansion,
230
+ };
231
+ })
232
+ .filter((storageClass) => storageClass.name);
233
+ }
234
+ catch {
235
+ return [];
236
+ }
237
+ }
238
+ async function getPersistentStorageCapacityGi(storageClassName) {
239
+ if (!storageClassName)
240
+ return undefined;
241
+ try {
242
+ const { stdout } = await execa("kubectl", ["get", "csistoragecapacity", "-A", "-o", "json"], { timeout: 15000 });
243
+ const data = JSON.parse(stdout);
244
+ const capacities = data.items
245
+ ?.filter((item) => item.storageClassName === storageClassName)
246
+ .map((item) => parseMemoryToGi(item.capacity || "0"))
247
+ .filter((capacity) => capacity > 0) ?? [];
248
+ if (capacities.length === 0)
249
+ return undefined;
250
+ return capacities.reduce((sum, capacity) => sum + capacity, 0);
251
+ }
252
+ catch {
253
+ return undefined;
254
+ }
255
+ }
256
+ /**
257
+ * Inspects the current cluster's node architecture, schedulable capacity, and
258
+ * storage classes. The CLI uses this to keep Helm values compatible with the
259
+ * Kubernetes resources the user has already made available (storage class, ARM
260
+ * tolerations, etc.); workload sizing itself follows the chart defaults.
261
+ */
262
+ export async function inferClusterCapabilities() {
263
+ try {
264
+ const { stdout } = await execa("kubectl", ["get", "nodes", "-o", "json"], {
265
+ timeout: 15000,
266
+ });
267
+ const data = JSON.parse(stdout);
268
+ const schedulableNodes = data.items?.filter((node) => !node.spec?.unschedulable) ?? [];
269
+ let totalCpu = 0;
270
+ let totalMemoryGi = 0;
271
+ let arm64TolerationRequired = false;
272
+ const architectures = new Set();
273
+ for (const node of schedulableNodes) {
274
+ totalCpu += parseCpuToCores(node.status?.allocatable?.cpu || "0");
275
+ totalMemoryGi += parseMemoryToGi(node.status?.allocatable?.memory || "0");
276
+ const architecture = normalizeNodeArchitecture(node.status?.nodeInfo?.architecture ||
277
+ node.metadata?.labels?.["kubernetes.io/arch"] ||
278
+ node.metadata?.labels?.["beta.kubernetes.io/arch"]);
279
+ if (architecture) {
280
+ architectures.add(architecture);
281
+ }
282
+ if (architecture === "arm64" &&
283
+ node.spec?.taints?.some((taint) => taint.key === "kubernetes.io/arch" &&
284
+ taint.value === "arm64" &&
285
+ taint.effect === "NoSchedule")) {
286
+ arm64TolerationRequired = true;
287
+ }
288
+ }
289
+ const storageClasses = await getStorageClasses();
290
+ const defaultStorageClass = storageClasses.find((storageClass) => storageClass.isDefault) ??
291
+ storageClasses[0];
292
+ const totalPersistentStorageGi = await getPersistentStorageCapacityGi(defaultStorageClass?.name);
293
+ return {
294
+ nodeArchitecture: summarizeNodeArchitecture(architectures),
295
+ arm64TolerationRequired,
296
+ schedulableNodeCount: schedulableNodes.length,
297
+ totalCpuCores: totalCpu,
298
+ totalMemoryGi,
299
+ eligibleCpuCores: roundUpForEligibility(totalCpu),
300
+ eligibleMemoryGi: roundUpForEligibility(totalMemoryGi),
301
+ totalPersistentStorageGi,
302
+ storageClasses,
303
+ defaultStorageClass,
304
+ storageClass: defaultStorageClass?.name,
305
+ storageProvisioner: defaultStorageClass?.provisioner,
306
+ };
307
+ }
308
+ catch {
309
+ return null;
310
+ }
311
+ }
168
312
  /**
169
313
  * Gets pod status for the Rulebricks namespace
170
314
  */
@@ -378,6 +522,203 @@ export async function streamLogs(podName, namespace = DEFAULT_NAMESPACE, options
378
522
  }
379
523
  await execa("kubectl", args, { stdio: "inherit" });
380
524
  }
525
+ export async function execInPod(namespace, podName, container, args) {
526
+ const kubectlArgs = ["exec", "-n", namespace, podName];
527
+ if (container) {
528
+ kubectlArgs.push("-c", container);
529
+ }
530
+ kubectlArgs.push("--", ...args);
531
+ try {
532
+ const { stdout } = await execa("kubectl", kubectlArgs);
533
+ return stdout;
534
+ }
535
+ catch (error) {
536
+ throw new Error(`Failed to exec into pod ${podName}:\n${getErrorMessage(error)}`);
537
+ }
538
+ }
539
+ export async function runEphemeralJob(options) {
540
+ const { name, namespace, serviceAccountName, image, command, env = [], volumeMounts = [], volumes = [], initContainers = [], labels = {}, backoffLimit = 0, timeoutSeconds = 3600, } = options;
541
+ const podSpec = {
542
+ restartPolicy: "Never",
543
+ serviceAccountName,
544
+ containers: [
545
+ {
546
+ name: "job",
547
+ image,
548
+ imagePullPolicy: "IfNotPresent",
549
+ command,
550
+ env,
551
+ volumeMounts,
552
+ },
553
+ ],
554
+ volumes,
555
+ };
556
+ if (initContainers.length > 0) {
557
+ podSpec.initContainers = initContainers;
558
+ }
559
+ const manifest = {
560
+ apiVersion: "batch/v1",
561
+ kind: "Job",
562
+ metadata: {
563
+ name,
564
+ namespace,
565
+ labels,
566
+ },
567
+ spec: {
568
+ backoffLimit,
569
+ template: {
570
+ metadata: {
571
+ labels,
572
+ },
573
+ spec: podSpec,
574
+ },
575
+ },
576
+ };
577
+ try {
578
+ await execa("kubectl", [
579
+ "delete",
580
+ "job",
581
+ name,
582
+ "-n",
583
+ namespace,
584
+ "--ignore-not-found=true",
585
+ ]);
586
+ await execa("kubectl", ["apply", "-f", "-"], {
587
+ input: JSON.stringify(manifest),
588
+ });
589
+ await execa("kubectl", [
590
+ "wait",
591
+ "--for=condition=complete",
592
+ `job/${name}`,
593
+ "-n",
594
+ namespace,
595
+ `--timeout=${timeoutSeconds}s`,
596
+ ]);
597
+ const logs = await getJobLogs(name, namespace);
598
+ return { jobName: name, logs };
599
+ }
600
+ catch (error) {
601
+ const logs = await getJobLogs(name, namespace).catch(() => "");
602
+ const failed = await isJobFailed(name, namespace).catch(() => false);
603
+ if (failed) {
604
+ throw new Error(`Job ${name} failed:\n${logs || getErrorMessage(error)}`);
605
+ }
606
+ throw new Error(`Job ${name} did not complete:\n${logs || getErrorMessage(error)}`);
607
+ }
608
+ }
609
+ export async function createJobFromCronJob(namespace, cronJobName, jobName) {
610
+ try {
611
+ await execa("kubectl", [
612
+ "delete",
613
+ "job",
614
+ jobName,
615
+ "-n",
616
+ namespace,
617
+ "--ignore-not-found=true",
618
+ ]);
619
+ await execa("kubectl", [
620
+ "create",
621
+ "job",
622
+ jobName,
623
+ "-n",
624
+ namespace,
625
+ `--from=cronjob/${cronJobName}`,
626
+ ]);
627
+ }
628
+ catch (error) {
629
+ throw new Error(`Failed to create backup job:\n${getErrorMessage(error)}`);
630
+ }
631
+ }
632
+ export async function waitForJobComplete(namespace, jobName, timeoutSeconds = 3600) {
633
+ try {
634
+ await execa("kubectl", [
635
+ "wait",
636
+ "--for=condition=complete",
637
+ `job/${jobName}`,
638
+ "-n",
639
+ namespace,
640
+ `--timeout=${timeoutSeconds}s`,
641
+ ]);
642
+ return await getJobLogs(jobName, namespace);
643
+ }
644
+ catch (error) {
645
+ const logs = await getJobLogs(jobName, namespace).catch(() => "");
646
+ const failed = await isJobFailed(jobName, namespace).catch(() => false);
647
+ if (failed) {
648
+ throw new Error(`Job ${jobName} failed:\n${logs || getErrorMessage(error)}`);
649
+ }
650
+ throw new Error(`Timed out waiting for job ${jobName}:\n${logs || getErrorMessage(error)}`);
651
+ }
652
+ }
653
+ export async function getJobLogs(jobName, namespace) {
654
+ const { stdout } = await execa("kubectl", [
655
+ "logs",
656
+ `job/${jobName}`,
657
+ "-n",
658
+ namespace,
659
+ "--all-containers=true",
660
+ ]);
661
+ return stdout;
662
+ }
663
+ async function isJobFailed(jobName, namespace) {
664
+ const { stdout } = await execa("kubectl", [
665
+ "get",
666
+ "job",
667
+ jobName,
668
+ "-n",
669
+ namespace,
670
+ "-o",
671
+ "jsonpath={.status.failed}",
672
+ ]);
673
+ return Number.parseInt(stdout || "0", 10) > 0;
674
+ }
675
+ export async function scaleDeployment(namespace, name, replicas) {
676
+ try {
677
+ await execa("kubectl", [
678
+ "scale",
679
+ "deployment",
680
+ name,
681
+ "-n",
682
+ namespace,
683
+ `--replicas=${replicas}`,
684
+ ]);
685
+ }
686
+ catch (error) {
687
+ throw new Error(`Failed to scale deployment ${name}:\n${getErrorMessage(error)}`);
688
+ }
689
+ }
690
+ export async function waitForDeploymentReady(namespace, name, timeoutSeconds = 600) {
691
+ try {
692
+ await execa("kubectl", [
693
+ "rollout",
694
+ "status",
695
+ `deployment/${name}`,
696
+ "-n",
697
+ namespace,
698
+ `--timeout=${timeoutSeconds}s`,
699
+ ]);
700
+ }
701
+ catch (error) {
702
+ throw new Error(`Deployment ${name} is not ready:\n${getErrorMessage(error)}`);
703
+ }
704
+ }
705
+ export async function getDeploymentReplicas(namespace, name) {
706
+ try {
707
+ const { stdout } = await execa("kubectl", [
708
+ "get",
709
+ "deployment",
710
+ name,
711
+ "-n",
712
+ namespace,
713
+ "-o",
714
+ "jsonpath={.spec.replicas}",
715
+ ]);
716
+ return Number.parseInt(stdout || "0", 10);
717
+ }
718
+ catch {
719
+ return null;
720
+ }
721
+ }
381
722
  /**
382
723
  * Colors for multi-pod log prefixes
383
724
  */
@@ -623,45 +964,111 @@ export async function deletePVCs(namespace, options = {}) {
623
964
  }
624
965
  }
625
966
  }
967
+ // Custom resources whose operator sets a finalizer that only that operator can
968
+ // clear. When the operator is uninstalled with the release, those finalizers are
969
+ // never removed and wedge the namespace (and the CRD) in Terminating forever.
970
+ // Observed blockers: KEDA ScaledObjects, cert-manager ACME Challenges/Orders, and
971
+ // Strimzi Kafka resources.
972
+ const FINALIZER_BLOCKING_CR_TYPES = [
973
+ "scaledobjects.keda.sh",
974
+ "scaledjobs.keda.sh",
975
+ "challenges.acme.cert-manager.io",
976
+ "orders.acme.cert-manager.io",
977
+ "certificaterequests.cert-manager.io",
978
+ "certificates.cert-manager.io",
979
+ "kafkatopics.kafka.strimzi.io",
980
+ "kafkausers.kafka.strimzi.io",
981
+ "kafkanodepools.kafka.strimzi.io",
982
+ "kafkas.kafka.strimzi.io",
983
+ ];
626
984
  /**
627
- * Removes finalizers from KEDA ScaledObjects to prevent namespace deletion from hanging.
628
- * KEDA finalizers wait for the KEDA controller to clean up, but if KEDA is being deleted
629
- * with the namespace, this causes a deadlock.
985
+ * Strips finalizers from the custom resources whose controllers are torn down
986
+ * with the release, so the namespace can finalize instead of hanging in
987
+ * Terminating (NamespaceFinalizersRemaining). Best-effort per type a missing
988
+ * CRD (feature disabled) or already-gone object is fine.
630
989
  */
631
- export async function removeKedaFinalizers(namespace) {
632
- try {
633
- // Get all ScaledObjects in the namespace
634
- const { stdout } = await execa("kubectl", [
635
- "get",
636
- "scaledobjects.keda.sh",
637
- "-n",
638
- namespace,
639
- "-o",
640
- "jsonpath={.items[*].metadata.name}",
641
- ], { timeout: 15000 });
642
- const scaledObjects = stdout.split(" ").filter(Boolean);
643
- // Patch each ScaledObject to remove finalizers
644
- for (const name of scaledObjects) {
645
- try {
646
- await execa("kubectl", [
647
- "patch",
648
- "scaledobject",
649
- name,
650
- "-n",
651
- namespace,
652
- "-p",
653
- '{"metadata":{"finalizers":null}}',
654
- "--type=merge",
655
- ], { timeout: 15000 });
990
+ export async function removeBlockingFinalizers(namespace) {
991
+ for (const resourceType of FINALIZER_BLOCKING_CR_TYPES) {
992
+ try {
993
+ const { stdout } = await execa("kubectl", [
994
+ "get",
995
+ resourceType,
996
+ "-n",
997
+ namespace,
998
+ "-o",
999
+ "jsonpath={.items[*].metadata.name}",
1000
+ ], { timeout: 15000 });
1001
+ const names = stdout.split(" ").filter(Boolean);
1002
+ for (const name of names) {
1003
+ try {
1004
+ await execa("kubectl", [
1005
+ "patch",
1006
+ resourceType,
1007
+ name,
1008
+ "-n",
1009
+ namespace,
1010
+ "-p",
1011
+ '{"metadata":{"finalizers":null}}',
1012
+ "--type=merge",
1013
+ ], { timeout: 15000 });
1014
+ }
1015
+ catch {
1016
+ // Ignore — object might already be deleted.
1017
+ }
656
1018
  }
657
- catch {
658
- // Ignore errors - object might already be deleted
1019
+ }
1020
+ catch {
1021
+ // Ignore — this CRD might not be installed (feature disabled).
1022
+ }
1023
+ }
1024
+ }
1025
+ /**
1026
+ * Deletes aggregated APIServices (apiregistration.k8s.io) whose backing service
1027
+ * lives in the given namespace.
1028
+ *
1029
+ * Why this matters for teardown: an aggregated API (e.g. KEDA's
1030
+ * v1beta1.external.metrics.k8s.io, prometheus-adapter's custom.metrics.k8s.io,
1031
+ * etc.) is served by an in-namespace Service. When the namespace is torn down
1032
+ * that Service disappears and the (cluster-scoped) APIService goes Unavailable
1033
+ * with ServiceNotFound. The namespace controller must enumerate every API group
1034
+ * to delete a namespace's contents, so a single broken APIService makes its
1035
+ * discovery step fail and wedges the namespace in Terminating forever
1036
+ * (NamespaceDeletionDiscoveryFailure) - which then rejects any reinstall into
1037
+ * that namespace ("being terminated").
1038
+ *
1039
+ * Deleting these APIServices up front (they are going away with the namespace
1040
+ * anyway) keeps discovery healthy so the namespace can finalize. This is
1041
+ * generalized to ALL APIServices backed by the target namespace, not just KEDA,
1042
+ * and is safe: cluster APIs backed by other namespaces (e.g. metrics-server in
1043
+ * kube-system) are never matched. Listing APIService objects is served directly
1044
+ * by kube-apiserver, so this also works to rescue an already-stuck namespace.
1045
+ *
1046
+ * Returns the names of the APIServices that were deleted.
1047
+ */
1048
+ export async function cleanupNamespaceAPIServices(namespace) {
1049
+ const deleted = [];
1050
+ try {
1051
+ const { stdout } = await execa("kubectl", ["get", "apiservices", "-o", "json"], { timeout: 30000 });
1052
+ const parsed = JSON.parse(stdout);
1053
+ for (const item of parsed.items ?? []) {
1054
+ const name = item.metadata?.name;
1055
+ if (!name)
1056
+ continue;
1057
+ if (item.spec?.service?.namespace === namespace) {
1058
+ try {
1059
+ await execa("kubectl", ["delete", "apiservice", name, "--ignore-not-found"], { timeout: 30000 });
1060
+ deleted.push(name);
1061
+ }
1062
+ catch {
1063
+ // Best-effort: a single failure should not block teardown.
1064
+ }
659
1065
  }
660
1066
  }
661
1067
  }
662
1068
  catch {
663
- // Ignore errors - KEDA CRDs might not be installed
1069
+ // Best-effort: if APIServices can't be listed, don't block the destroy.
664
1070
  }
1071
+ return deleted;
665
1072
  }
666
1073
  /**
667
1074
  * Checks if a namespace exists
@@ -676,98 +1083,144 @@ export async function namespaceExists(namespace) {
676
1083
  }
677
1084
  }
678
1085
  /**
679
- * Waits for cluster to be accessible with retries.
680
- * EKS IAM authentication can take time to propagate after cluster creation.
1086
+ * Removes this release's leftovers in the kube-system namespace. The
1087
+ * kube-prometheus-stack prometheus-operator creates a "<release>-...-kubelet"
1088
+ * Service there at runtime (via its --kubelet-service flag); it lives OUTSIDE the
1089
+ * release namespace and is operator-created (not chart-templated), so
1090
+ * `helm uninstall` never deletes it and one accumulates per deployment. Also
1091
+ * sweeps any helm-labeled kube-system objects (exporter Services/Endpoints) a
1092
+ * partial uninstall may have stranded. Scoped strictly to this release; matched
1093
+ * by the release-name prefix so a coexisting deployment's kubelet Service is
1094
+ * never touched. Best-effort — never blocks teardown.
681
1095
  */
682
- export async function waitForClusterAccess(maxRetries = 30, delayMs = 10000) {
683
- for (let attempt = 1; attempt <= maxRetries; attempt++) {
684
- try {
685
- await execa("kubectl", ["cluster-info"]);
686
- return; // Success
687
- }
688
- catch (error) {
689
- if (attempt === maxRetries) {
690
- throw new Error(`Cluster not accessible after ${maxRetries} attempts. ` +
691
- `EKS IAM authentication may not have propagated yet. ` +
692
- `Please wait a few minutes and try again.\n${getErrorMessage(error)}`);
1096
+ export async function cleanupKubeSystemLeftovers(releaseName) {
1097
+ // 1) helm-labeled kube-system objects from this release (only present if a
1098
+ // prior uninstall didn't finish): the kube-prometheus-stack exporter
1099
+ // Services (coredns/kube-controller-manager/etc.) and their Endpoints.
1100
+ try {
1101
+ await execa("kubectl", [
1102
+ "delete",
1103
+ "service,endpoints",
1104
+ "-n",
1105
+ "kube-system",
1106
+ "-l",
1107
+ `release=${releaseName}`,
1108
+ "--ignore-not-found",
1109
+ ], { timeout: 30000 });
1110
+ }
1111
+ catch {
1112
+ // best-effort
1113
+ }
1114
+ // 2) the operator-created kubelet Service, matched by name (it carries no
1115
+ // reliable per-release label). Name is "<release>-<kube-prometheus>-kubelet"
1116
+ // (the middle segment is truncated by the helm fullname template). The
1117
+ // trailing "-" in the prefix guard prevents matching a sibling whose name
1118
+ // is a prefix of this one (e.g. az-p0 vs az-p055).
1119
+ try {
1120
+ const { stdout } = await execa("kubectl", [
1121
+ "get",
1122
+ "service",
1123
+ "-n",
1124
+ "kube-system",
1125
+ "-o",
1126
+ "jsonpath={.items[*].metadata.name}",
1127
+ ], { timeout: 15000 });
1128
+ const targets = stdout
1129
+ .split(" ")
1130
+ .filter(Boolean)
1131
+ .filter((n) => n.startsWith(`${releaseName}-`) && n.endsWith("-kubelet"));
1132
+ for (const name of targets) {
1133
+ try {
1134
+ await execa("kubectl", ["delete", "service", name, "-n", "kube-system", "--ignore-not-found"], { timeout: 30000 });
1135
+ }
1136
+ catch {
1137
+ // best-effort
693
1138
  }
694
- // Wait before next retry
695
- await sleep(delayMs);
696
1139
  }
697
1140
  }
1141
+ catch {
1142
+ // best-effort
1143
+ }
698
1144
  }
699
1145
  /**
700
- * Creates default StorageClass for the cloud provider.
701
- * Should be called after kubeconfig is configured and cluster is accessible.
702
- */
703
- export async function createDefaultStorageClass(provider) {
704
- // First wait for cluster to be accessible
705
- await waitForClusterAccess();
706
- let storageClassYaml;
707
- switch (provider) {
708
- case "aws":
709
- storageClassYaml = `
710
- apiVersion: storage.k8s.io/v1
711
- kind: StorageClass
712
- metadata:
713
- name: gp3
714
- annotations:
715
- storageclass.kubernetes.io/is-default-class: "true"
716
- provisioner: ebs.csi.aws.com
717
- reclaimPolicy: Delete
718
- volumeBindingMode: WaitForFirstConsumer
719
- parameters:
720
- type: gp3
721
- encrypted: "true"
722
- `;
723
- break;
724
- case "gcp":
725
- storageClassYaml = `
726
- apiVersion: storage.k8s.io/v1
727
- kind: StorageClass
728
- metadata:
729
- name: pd-ssd
730
- annotations:
731
- storageclass.kubernetes.io/is-default-class: "true"
732
- provisioner: pd.csi.storage.gke.io
733
- reclaimPolicy: Delete
734
- volumeBindingMode: WaitForFirstConsumer
735
- parameters:
736
- type: pd-ssd
737
- `;
738
- break;
739
- case "azure":
740
- storageClassYaml = `
741
- apiVersion: storage.k8s.io/v1
742
- kind: StorageClass
743
- metadata:
744
- name: managed-premium
745
- annotations:
746
- storageclass.kubernetes.io/is-default-class: "true"
747
- provisioner: disk.csi.azure.com
748
- reclaimPolicy: Delete
749
- volumeBindingMode: WaitForFirstConsumer
750
- parameters:
751
- skuName: Premium_LRS
752
- `;
753
- break;
754
- default:
755
- throw new Error(`Unsupported cloud provider: ${provider}`);
756
- }
1146
+ * True only when no OTHER Rulebricks deployment remains on the cluster (besides
1147
+ * `releaseName`). Gates deletion of cluster-SHARED resources (CRDs) so tearing
1148
+ * down one deployment never cascade-deletes another deployment's custom
1149
+ * resources. Deployments are named `rulebricks-<name>` for both the namespace and
1150
+ * the helm release (see getNamespace/getReleaseName), so the "rulebricks-" prefix
1151
+ * is a sound cluster-side signal. Fails CLOSED (returns false) if the cluster
1152
+ * can't be enumerated — we never purge shared resources on uncertainty.
1153
+ */
1154
+ export async function isLastRulebricksDeployment(releaseName) {
757
1155
  try {
758
- await execa("kubectl", ["apply", "-f", "-"], {
759
- input: storageClassYaml,
1156
+ // Authoritative: helm releases cluster-wide.
1157
+ const { stdout } = await execa("helm", ["list", "-A", "-o", "json"], {
1158
+ timeout: 30000,
760
1159
  });
1160
+ const releases = JSON.parse(stdout);
1161
+ const otherReleases = releases.filter((r) => typeof r.name === "string" &&
1162
+ r.name.startsWith("rulebricks-") &&
1163
+ r.name !== releaseName);
1164
+ if (otherReleases.length > 0)
1165
+ return false;
1166
+ // Cross-check namespaces in case a release secret is gone but the ns lingers
1167
+ // (namespace name == release name by convention).
1168
+ const { stdout: nsOut } = await execa("kubectl", ["get", "namespaces", "-o", "jsonpath={.items[*].metadata.name}"], { timeout: 15000 });
1169
+ const otherNamespaces = nsOut
1170
+ .split(" ")
1171
+ .filter(Boolean)
1172
+ .filter((n) => n.startsWith("rulebricks-") && n !== releaseName);
1173
+ return otherNamespaces.length === 0;
761
1174
  }
762
- catch (error) {
763
- throw new Error(`Failed to create StorageClass:\n${getErrorMessage(error)}`);
1175
+ catch {
1176
+ return false; // fail closed — do not purge shared resources on uncertainty
1177
+ }
1178
+ }
1179
+ // CRD API-group suffixes the chart ships in crds/ dirs (cert-manager + keda from
1180
+ // the parent crds/, strimzi + kube-prometheus-stack from subchart crds/). helm
1181
+ // NEVER deletes crds/ contents on uninstall, so they leak and accumulate.
1182
+ const RULEBRICKS_CRD_GROUP_SUFFIXES = [
1183
+ ".strimzi.io", // kafka.strimzi.io, core.strimzi.io
1184
+ "cert-manager.io", // cert-manager.io, acme.cert-manager.io
1185
+ ".keda.sh", // keda.sh, eventing.keda.sh
1186
+ "monitoring.coreos.com", // kube-prometheus-stack
1187
+ ];
1188
+ /**
1189
+ * Deletes the cluster-scoped CRDs the chart installs from crds/ dirs (cert-
1190
+ * manager, keda, strimzi, kube-prometheus-stack). CLUSTER-SHARED: deleting a CRD
1191
+ * cascade-deletes every custom resource of that kind across ALL namespaces, so
1192
+ * callers MUST gate this on isLastRulebricksDeployment() (or an explicit
1193
+ * operator --purge) — never call it while another Rulebricks deployment exists.
1194
+ * Best-effort, non-blocking; returns the CRD names removed.
1195
+ */
1196
+ export async function deleteRulebricksCRDs() {
1197
+ const deleted = [];
1198
+ try {
1199
+ const { stdout } = await execa("kubectl", ["get", "crd", "-o", "jsonpath={.items[*].metadata.name}"], { timeout: 30000 });
1200
+ const targets = stdout
1201
+ .split(" ")
1202
+ .filter(Boolean)
1203
+ .filter((name) => RULEBRICKS_CRD_GROUP_SUFFIXES.some((suffix) => name.endsWith(suffix)));
1204
+ for (const name of targets) {
1205
+ try {
1206
+ await execa("kubectl", ["delete", "crd", name, "--ignore-not-found", "--wait=false"], { timeout: 30000 });
1207
+ deleted.push(name);
1208
+ }
1209
+ catch {
1210
+ // best-effort: a single CRD failure should not block teardown
1211
+ }
1212
+ }
1213
+ }
1214
+ catch {
1215
+ // best-effort: if CRDs can't be listed, don't block the destroy
764
1216
  }
1217
+ return deleted;
765
1218
  }
766
1219
  /**
767
1220
  * Extracts the version tag from a Docker image string.
768
1221
  * E.g., "rulebricks/rulebricks:v1.5.8" -> "v1.5.8"
769
1222
  */
770
- function extractImageTag(image) {
1223
+ export function extractImageTag(image) {
771
1224
  if (!image)
772
1225
  return null;
773
1226
  const parts = image.split(":");
@@ -775,51 +1228,88 @@ function extractImageTag(image) {
775
1228
  return null;
776
1229
  return parts[parts.length - 1];
777
1230
  }
778
- /**
779
- * Gets the actual deployed image versions from Kubernetes deployments.
780
- * Queries the app and HPS deployments to get their current image tags.
781
- *
782
- * @param releaseName - The Helm release name (e.g., "rulebricks")
783
- * @param namespace - The Kubernetes namespace
784
- * @returns DeployedVersions with app and HPS versions, or null if not found
785
- */
786
- export async function getDeployedImageVersions(releaseName, namespace) {
787
- const result = {
788
- appVersion: null,
789
- hpsVersion: null,
790
- };
791
- // Get app deployment image
1231
+ export function extractImageDigest(imageId) {
1232
+ const digest = imageId.split("@").pop();
1233
+ return digest?.startsWith("sha256:") ? digest : null;
1234
+ }
1235
+ async function getWorkloadImage(workloadType, name, namespace) {
792
1236
  try {
793
- const { stdout: appImage } = await execa("kubectl", [
1237
+ const { stdout } = await execa("kubectl", [
794
1238
  "get",
795
- "deployment",
796
- `${releaseName}-app`,
1239
+ workloadType,
1240
+ name,
797
1241
  "-n",
798
1242
  namespace,
799
1243
  "-o",
800
1244
  "jsonpath={.spec.template.spec.containers[0].image}",
801
1245
  ]);
802
- result.appVersion = extractImageTag(appImage.trim());
1246
+ return stdout.trim() || null;
803
1247
  }
804
1248
  catch {
805
- // Deployment may not exist or cluster not accessible
1249
+ return null;
806
1250
  }
807
- // Get HPS deployment image
1251
+ }
1252
+ async function getPodImageDigests(releaseName, workloadName, namespace, containerName) {
808
1253
  try {
809
- const { stdout: hpsImage } = await execa("kubectl", [
1254
+ const { stdout } = await execa("kubectl", [
810
1255
  "get",
811
- "deployment",
812
- `${releaseName}-hps`,
1256
+ "pods",
813
1257
  "-n",
814
1258
  namespace,
1259
+ "-l",
1260
+ `app.kubernetes.io/name=${workloadName},app.kubernetes.io/instance=${releaseName}`,
815
1261
  "-o",
816
- "jsonpath={.spec.template.spec.containers[0].image}",
1262
+ "json",
817
1263
  ]);
818
- result.hpsVersion = extractImageTag(hpsImage.trim());
1264
+ const data = JSON.parse(stdout);
1265
+ return Array.from(new Set((data.items || [])
1266
+ .flatMap((pod) => pod.status?.containerStatuses || [])
1267
+ .filter((status) => status.name === containerName)
1268
+ .map((status) => extractImageDigest(status.imageID || ""))
1269
+ .filter((digest) => Boolean(digest))));
819
1270
  }
820
1271
  catch {
821
- // Deployment may not exist or cluster not accessible
1272
+ return [];
822
1273
  }
1274
+ }
1275
+ /**
1276
+ * Gets actual deployed image tags and running image digests from Kubernetes.
1277
+ * HPS runs as StatefulSets, so digest checks inspect the pods behind those sets.
1278
+ *
1279
+ * @param releaseName - The Helm release name (e.g., "rulebricks")
1280
+ * @param namespace - The Kubernetes namespace
1281
+ * @returns DeployedVersions with app and HPS versions, or null if not found
1282
+ */
1283
+ export async function getDeployedImageVersions(releaseName, namespace) {
1284
+ const result = {
1285
+ appVersion: null,
1286
+ hpsVersion: null,
1287
+ hpsWorkerVersion: null,
1288
+ appDigest: null,
1289
+ hpsDigests: [],
1290
+ hpsWorkerDigests: [],
1291
+ };
1292
+ const appName = `${releaseName}-app`;
1293
+ const hpsName = `${releaseName}-hps`;
1294
+ const hpsWorkerName = `${releaseName}-hps-worker`;
1295
+ const [appImage, hpsImage, hpsWorkerImage] = await Promise.all([
1296
+ getWorkloadImage("deployment", appName, namespace),
1297
+ getWorkloadImage("statefulset", hpsName, namespace),
1298
+ getWorkloadImage("statefulset", hpsWorkerName, namespace),
1299
+ ]);
1300
+ result.appVersion = appImage ? extractImageTag(appImage) : null;
1301
+ result.hpsVersion = hpsImage ? extractImageTag(hpsImage) : null;
1302
+ result.hpsWorkerVersion = hpsWorkerImage
1303
+ ? extractImageTag(hpsWorkerImage)?.replace(/^worker-/, "") || null
1304
+ : null;
1305
+ const [appDigests, hpsDigests, hpsWorkerDigests] = await Promise.all([
1306
+ getPodImageDigests(releaseName, appName, namespace, "app"),
1307
+ getPodImageDigests(releaseName, hpsName, namespace, "hps"),
1308
+ getPodImageDigests(releaseName, hpsWorkerName, namespace, "hps-worker"),
1309
+ ]);
1310
+ result.appDigest = appDigests[0] || null;
1311
+ result.hpsDigests = hpsDigests;
1312
+ result.hpsWorkerDigests = hpsWorkerDigests;
823
1313
  return result;
824
1314
  }
825
1315
  /**