@rulebricks/cli 2.1.7 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/README.md +51 -16
  2. package/cluster-setup/aws/README.md +96 -47
  3. package/cluster-setup/aws/check-aws-access.sh +216 -52
  4. package/cluster-setup/aws/parameters.json +13 -0
  5. package/cluster-setup/aws/rulebricks-cluster.cfn.yaml +355 -0
  6. package/cluster-setup/azure/README.md +103 -55
  7. package/cluster-setup/azure/check-aks-prereqs.sh +236 -56
  8. package/cluster-setup/azure/parameters.json +30 -0
  9. package/cluster-setup/azure/rulebricks-cluster.bicep +546 -0
  10. package/cluster-setup/gcp/README.md +51 -34
  11. package/cluster-setup/gcp/check-gke-prereqs.sh +222 -60
  12. package/dist/commands/backup.d.ts +5 -0
  13. package/dist/commands/backup.js +104 -0
  14. package/dist/commands/deploy.d.ts +3 -1
  15. package/dist/commands/deploy.js +226 -326
  16. package/dist/commands/destroy.d.ts +1 -1
  17. package/dist/commands/destroy.js +73 -123
  18. package/dist/commands/init.d.ts +5 -1
  19. package/dist/commands/init.js +78 -54
  20. package/dist/commands/list.d.ts +1 -0
  21. package/dist/commands/list.js +74 -0
  22. package/dist/commands/open.d.ts +1 -1
  23. package/dist/commands/open.js +4 -12
  24. package/dist/commands/redeploy.d.ts +6 -0
  25. package/dist/commands/redeploy.js +310 -0
  26. package/dist/commands/restore.d.ts +5 -0
  27. package/dist/commands/restore.js +338 -0
  28. package/dist/commands/status.js +62 -49
  29. package/dist/commands/upgrade.js +74 -51
  30. package/dist/components/DNSWaitScreen.d.ts +5 -1
  31. package/dist/components/DNSWaitScreen.js +47 -41
  32. package/dist/components/Wizard/WizardContext.d.ts +157 -36
  33. package/dist/components/Wizard/WizardContext.js +872 -160
  34. package/dist/components/Wizard/steps/CloudProviderStep.js +192 -107
  35. package/dist/components/Wizard/steps/DomainStep.js +5 -24
  36. package/dist/components/Wizard/steps/ExternalServicesStep.d.ts +6 -0
  37. package/dist/components/Wizard/steps/ExternalServicesStep.js +645 -0
  38. package/dist/components/Wizard/steps/FeatureConfigStep.d.ts +2 -1
  39. package/dist/components/Wizard/steps/FeatureConfigStep.js +739 -425
  40. package/dist/components/Wizard/steps/FeaturesStep.js +31 -35
  41. package/dist/components/Wizard/steps/ObservabilityStep.d.ts +6 -0
  42. package/dist/components/Wizard/steps/ObservabilityStep.js +137 -0
  43. package/dist/components/Wizard/steps/ReviewStep.d.ts +2 -1
  44. package/dist/components/Wizard/steps/ReviewStep.js +56 -12
  45. package/dist/components/Wizard/steps/StorageStep.d.ts +9 -0
  46. package/dist/components/Wizard/steps/StorageStep.js +592 -0
  47. package/dist/components/Wizard/steps/SupabaseCredentialsStep.js +20 -21
  48. package/dist/components/Wizard/steps/VersionStep.js +45 -23
  49. package/dist/components/Wizard/steps/index.d.ts +3 -3
  50. package/dist/components/Wizard/steps/index.js +3 -3
  51. package/dist/components/common/CommandApproval.d.ts +12 -0
  52. package/dist/components/common/CommandApproval.js +91 -0
  53. package/dist/components/common/DeploymentPicker.d.ts +14 -0
  54. package/dist/components/common/DeploymentPicker.js +16 -0
  55. package/dist/components/common/index.d.ts +2 -0
  56. package/dist/components/common/index.js +2 -0
  57. package/dist/index.js +94 -62
  58. package/dist/lib/cloudCli.d.ts +134 -63
  59. package/dist/lib/cloudCli.js +512 -220
  60. package/dist/lib/clusterSetupDefaults.d.ts +30 -0
  61. package/dist/lib/clusterSetupDefaults.js +64 -0
  62. package/dist/lib/commandApproval.d.ts +26 -0
  63. package/dist/lib/commandApproval.js +114 -0
  64. package/dist/lib/config.d.ts +12 -10
  65. package/dist/lib/config.js +91 -33
  66. package/dist/lib/configFixtures.d.ts +5 -0
  67. package/dist/lib/configFixtures.js +513 -0
  68. package/dist/lib/deploymentHealth.d.ts +32 -0
  69. package/dist/lib/deploymentHealth.js +157 -0
  70. package/dist/lib/dns.d.ts +1 -1
  71. package/dist/lib/dns.js +19 -1
  72. package/dist/lib/dns.test.d.ts +1 -0
  73. package/dist/lib/dns.test.js +27 -0
  74. package/dist/lib/dockerHub.d.ts +12 -1
  75. package/dist/lib/dockerHub.js +18 -8
  76. package/dist/lib/helm.d.ts +4 -0
  77. package/dist/lib/helm.js +16 -0
  78. package/dist/lib/helmValues.d.ts +25 -0
  79. package/dist/lib/helmValues.js +1762 -289
  80. package/dist/lib/helmValues.test.d.ts +1 -0
  81. package/dist/lib/helmValues.test.js +966 -0
  82. package/dist/lib/htpasswd.d.ts +1 -0
  83. package/dist/lib/htpasswd.js +15 -0
  84. package/dist/lib/kubernetes.d.ts +124 -17
  85. package/dist/lib/kubernetes.js +576 -145
  86. package/dist/lib/secrets.d.ts +23 -0
  87. package/dist/lib/secrets.js +158 -0
  88. package/dist/lib/validateValues.d.ts +31 -0
  89. package/dist/lib/validateValues.js +253 -0
  90. package/dist/lib/versions.d.ts +82 -11
  91. package/dist/lib/versions.js +131 -31
  92. package/dist/lib/versions.test.d.ts +1 -0
  93. package/dist/lib/versions.test.js +81 -0
  94. package/dist/lib/wizardSteps.d.ts +14 -0
  95. package/dist/lib/wizardSteps.js +23 -0
  96. package/dist/lib/workloadIdentity.d.ts +26 -0
  97. package/dist/lib/workloadIdentity.js +323 -0
  98. package/dist/lib/workloadIdentity.test.d.ts +1 -0
  99. package/dist/lib/workloadIdentity.test.js +57 -0
  100. package/dist/types/index.d.ts +1860 -164
  101. package/dist/types/index.js +518 -295
  102. package/package.json +9 -4
  103. package/schema/values.schema.json +1934 -0
  104. package/cluster-setup/aws/cluster.yaml +0 -33
  105. package/cluster-setup/azure/main.bicep +0 -282
  106. package/cluster-setup/azure/main.parameters.json +0 -21
  107. package/dist/components/Wizard/steps/CredentialsStep.d.ts +0 -6
  108. package/dist/components/Wizard/steps/CredentialsStep.js +0 -22
  109. package/dist/components/Wizard/steps/DeploymentModeStep.d.ts +0 -5
  110. package/dist/components/Wizard/steps/DeploymentModeStep.js +0 -26
  111. package/dist/components/Wizard/steps/TierStep.d.ts +0 -6
  112. package/dist/components/Wizard/steps/TierStep.js +0 -29
  113. package/dist/lib/terraform.d.ts +0 -66
  114. package/dist/lib/terraform.js +0 -754
  115. package/terraform/aws/main.tf +0 -355
  116. package/terraform/azure/main.tf +0 -371
  117. package/terraform/gcp/main.tf +0 -407
package/README.md CHANGED
@@ -1,8 +1,8 @@
1
1
  ![Banner](./banner.png)
2
2
 
3
- The Rulebricks CLI is a management utility that automates the creation and maintenance of private Rulebricks clusters, helping you deploy Rulebricks in customizable, high-throughput configurations on AWS, GCP, or Azure.
3
+ The Rulebricks CLI is a management utility for configuring and deploying private Rulebricks instances onto Kubernetes clusters you already control.
4
4
 
5
- You can choose how much you would like the CLI to automate for you– use it to generate valid configuration values, automate infrastructure provisioning (via Terraform), software deployment (via Helm), or all of the above.
5
+ It focuses on generating valid Rulebricks configuration values, sizing the application from the selected cluster's available resources, and deploying the Helm chart.
6
6
 
7
7
  ## Installation
8
8
 
@@ -17,6 +17,8 @@ to deploy using this CLI. You will be
17
17
  requested for this key during project
18
18
  configuration.
19
19
 
20
+ You must also have an **available Kubernetes cluster** to deploy to. You can use the `cluster-setup` directory to easily create a standalone cluster for Rulebricks. These resources satisfy the minimum cluster requirements, role/identity resources, and object storage buckets required for a production deployment, and double as documentation for teams looking to deploy Rulebricks to an existing cluster.
21
+
20
22
  Rulebricks requires TLS. You will require either external-dns on your cluster to automatically add DNS records, or you will need **access** to manually add **DNS records** for the subdomain(s) where you would like to access your private deployment from.
21
23
 
22
24
  Finally, you will need to have the following tools installed and ready on your machine:
@@ -24,12 +26,11 @@ Finally, you will need to have the following tools installed and ready on your m
24
26
  - **Node.js** >= 20
25
27
  - **kubectl** - Kubernetes CLI
26
28
  - **Helm** >= 3.0
27
- - **Terraform** >= 1.0 (for infrastructure provisioning)
28
- - Cloud CLI (`aws`, `gcloud`, or `az`) configured for your provider
29
+ - Cloud CLI (`aws`, `gcloud`, or `az`) configured for your provider if you want the wizard to discover clusters or refresh kubeconfig
29
30
 
30
31
  ## Cluster Setup
31
32
 
32
- If you want to create the Kubernetes cluster yourself, use the resources in `cluster-setup/` before running the CLI wizard. These files provide minimum compatible AWS, Azure, and GCP cluster setup guidance plus optional access checks. Monitoring destinations are configured later by the CLI wizard and Helm values, not by these cluster setup files.
33
+ Create or select a Kubernetes cluster before running the CLI wizard. If you need a starting point, use the resources in `cluster-setup/`; they provide minimum compatible AWS, Azure, and GCP setup guidance plus optional access checks. Monitoring destinations are configured later by the CLI wizard and Helm values, not by these cluster setup files.
33
34
 
34
35
  ```bash
35
36
  # AWS: optional access check, then create EKS with eksctl
@@ -51,7 +52,7 @@ GCP_REGION=us-central1 bash cluster-setup/gcp/check-gke-prereqs.sh
51
52
  # Follow cluster-setup/gcp/README.md for the gcloud create commands.
52
53
  ```
53
54
 
54
- After the cluster exists, update kubeconfig, then run `rulebricks init` and choose **Use existing Kubernetes cluster**. The existing Terraform provisioning path remains available, but native cloud setup is the clearest path when you want to own the cluster directly.
55
+ After the cluster exists, update kubeconfig, then run `rulebricks init`. The wizard can also refresh kubeconfig for EKS, GKE, or AKS when provider details are available.
55
56
 
56
57
  ## Quick Start
57
58
 
@@ -59,24 +60,58 @@ After the cluster exists, update kubeconfig, then run `rulebricks init` and choo
59
60
  # Configuration wizard (generates values.yaml)
60
61
  rulebricks init
61
62
 
62
- # Provision and/or deploy to your cluster
63
+ # Deploy to your cluster
63
64
  rulebricks deploy my-deployment
64
65
  ```
65
66
 
67
+ The generated Helm values pin one Rulebricks product version under
68
+ `global.version`. That single semantic version selects the app, HPS, and HPS
69
+ worker images together.
70
+
66
71
  ## Main Commands
67
72
 
68
- | Command | Description |
69
- | --------------------------- | -------------------------------------- |
70
- | `rulebricks init` | Interactive setup wizard |
71
- | `rulebricks deploy [name]` | Deploy to Kubernetes |
72
- | `rulebricks upgrade [name]` | Upgrade to a new version |
73
- | `rulebricks destroy [name]` | Remove a deployment |
74
- | `rulebricks status [name]` | Show deployment health |
75
- | `rulebricks logs [name]` | Inspect services |
76
- | `rulebricks open [name]` | Open the generated configuration files |
73
+ | Command | Description |
74
+ | --------------------------- | ---------------------------------------- |
75
+ | `rulebricks init` | Interactive setup wizard |
76
+ | `rulebricks deploy [name]` | Deploy to Kubernetes |
77
+ | `rulebricks upgrade [name]` | Upgrade to a new version |
78
+ | `rulebricks destroy [name]` | Remove a deployment |
79
+ | `rulebricks status [name]` | Show deployment health |
80
+ | `rulebricks logs [name]` | Inspect services |
81
+ | `rulebricks open [name]` | Open the generated configuration files |
82
+ | `rulebricks backup [name]` | Run an on-demand database backup |
83
+ | `rulebricks restore [name]` | Restore the database from object storage |
77
84
 
78
85
  Use `rulebricks -h` to explore all commands, and add `-h` to any command to learn more about a particular command's options.
79
86
 
87
+ ## Monitoring
88
+
89
+ Self-hosted deployments enable Prometheus monitoring by default. The wizard only asks whether you want to configure a Prometheus `remote_write` destination; you can skip that step if you do not yet have AWS Managed Prometheus, Azure Monitor managed Prometheus, Grafana Cloud, or another remote-write-compatible backend ready.
90
+
91
+ By default, generated Helm values install `kube-prometheus-stack`, scrape Kubernetes and cluster metrics, and add Rulebricks scrape targets for:
92
+
93
+ - App/admin API health: request counts, latency histograms, coarse rejection counts, and frontend error counts.
94
+ - HPS rule-engine traffic: request counts, latency histograms, coarse rejection counts, Kafka worker wait time, bulk/parallel item volume, and memory cache stats.
95
+ - Supporting infrastructure where available: Kafka JMX, ClickHouse metrics when ClickHouse is enabled, and Traefik's Prometheus endpoint. Traefik's ServiceMonitor remains an explicit opt-in after Prometheus Operator CRDs are installed.
96
+
97
+ Metrics intentionally use low-cardinality labels such as route template, method, status class, operation, and rejection reason. They do not include API keys, users, organizations, IP addresses, raw URLs, rule slugs, flow slugs, or exception messages.
98
+
99
+ Useful PromQL examples:
100
+
101
+ ```promql
102
+ histogram_quantile(0.95, sum(rate(rulebricks_hps_http_request_duration_seconds_bucket[5m])) by (le, route))
103
+ sum(rate(rulebricks_hps_rejections_total[5m])) by (route, reason)
104
+ histogram_quantile(0.95, sum(rate(rulebricks_hps_kafka_request_duration_seconds_bucket[5m])) by (le, operation))
105
+ sum(rate(rulebricks_hps_bulk_items_total[5m])) by (operation)
106
+ sum(rate(rulebricks_app_frontend_errors_total[5m])) by (source)
107
+ ```
108
+
109
+ ## Object Storage and Backups
110
+
111
+ The wizard now collects a shared object storage backend for every deployment. Rulebricks uses separate prefixes in that bucket for decision logs (`decision-logs/`) and self-hosted Supabase database backups (`db-backups/`).
112
+
113
+ Database backups are optional for self-hosted Supabase deployments. When enabled, the Helm chart schedules Barman base backups according to the configured cron schedule and retention window. You can also run `rulebricks backup <name>` to trigger an on-demand backup, or `rulebricks restore <name>` to list backups in object storage and interactively restore one after confirmation.
114
+
80
115
  ## Notes
81
116
 
82
117
  There are a uniquely wide variety of customization options this CLI makes available (multi-cloud, hybrid vs. self-hosted database deployment, custom email templates, etc.), and not all combinations have been validated.
@@ -1,74 +1,123 @@
1
1
  # AWS Cluster Setup
2
2
 
3
- Use these files to create a minimum EKS cluster that can run Rulebricks without using the Rulebricks CLI Terraform flow.
3
+ A compact, turnkey EKS cluster for Rulebricks. One CloudFormation stack creates
4
+ the cluster **and** the S3 bucket + Amazon Managed Prometheus workspace the
5
+ platform needs, wired to workloads via **EKS Pod Identity** (AWS's recommended
6
+ mechanism for new clusters — no OIDC provider to manage).
7
+
8
+ `eksctl` is not used: it can create a cluster but not the bucket or AMP
9
+ workspace, so the full picture lives in one stack instead.
4
10
 
5
11
  ## Files
6
12
 
7
- - `cluster.yaml` is the minimum compatible `eksctl` cluster config, using ARM64 managed nodes and EBS CSI support.
8
- - `check-aws-access.sh` verifies AWS identity, common EKS/EC2/IAM permissions, quota, `eksctl`, `kubectl`, and Helm.
13
+ - `rulebricks-cluster.cfn.yaml` VPC, EKS cluster + managed node group, EBS CSI + Pod Identity add-ons, one S3 data bucket, AMP workspace, and a single IAM role. (The CLI creates the namespace-scoped Pod Identity associations at deploy time.)
14
+ - `parameters.json` sample parameter overrides (omit any to use template defaults).
15
+ - `check-aws-prereqs.sh` — verifies identity, service access, IAM role-creation rights, quota, kubectl/helm.
9
16
 
10
- ## Core Cluster Parameters
17
+ ## One role, one bucket
11
18
 
12
- - Cluster name: `rulebricks-cluster` (`cluster.yaml` -> `metadata.name`)
13
- - Region: `us-east-1` (`cluster.yaml` -> `metadata.region`)
14
- - Kubernetes version: `1.34` (`cluster.yaml` -> `metadata.version`)
15
- - Node count: `4` (`cluster.yaml` -> `managedNodeGroups[0].desiredCapacity`)
16
- - Instance type: `c8g.large` (`cluster.yaml` -> `managedNodeGroups[0].instanceType`)
17
- - Disk size (GB): `50` (`cluster.yaml` -> `managedNodeGroups[0].volumeSize`)
18
- - Disk type: `gp3` (`cluster.yaml` -> `managedNodeGroups[0].volumeType`)
19
+ A single IAM role, `<cluster>-rulebricks`, is bound to the ServiceAccounts that
20
+ need cloud access via `EKS::PodIdentityAssociation`. All data lives in one
21
+ bucket, `<cluster>-data-<account-id>`, under per-purpose prefixes.
19
22
 
20
- ## Check Access
23
+ | Path | Service account | Permission / target |
24
+ | --------------------------------- | ------------------------------------ | --------------------------------------------------------- |
25
+ | Decision logs (Vector → S3) | `vector` | `s3:*Object`/`ListBucket` → `<cluster>-data/decision-logs/` |
26
+ | DB backups (job → S3) | `rulebricks-<deploymentName>-backup` | `s3:*Object`/`ListBucket` → `<cluster>-data/db-backups/` |
27
+ | Metrics (Prometheus remote write) | `prometheus` | `aps:RemoteWrite` → AMP workspace |
21
28
 
22
- ```bash
23
- AWS_REGION=us-east-1 bash check-aws-access.sh
24
- ```
29
+ The bucket is encrypted and has public access blocked.
30
+
31
+ > **This stack does not need a deployment name.** `EKS::PodIdentityAssociation` is
32
+ > `namespace`-scoped, so the **Rulebricks CLI creates the associations** (vector / backup /
33
+ > prometheus → this role) at `rulebricks deploy` time. The stack only provisions the
34
+ > deployment-independent role, bucket, and AMP workspace, so one cluster can host many deployments.
35
+
36
+ ## Core cluster parameters
37
+
38
+ `ClusterName` (`rulebricks-cluster`), `KubernetesVersion` (`1.34`),
39
+ `NodeInstanceType` (`c7i.xlarge`), `NodeDesiredCapacity`/`NodeMinSize`/`NodeMaxSize`
40
+ (`2`/`2`/`4`), `NodeVolumeSizeGiB` (`50`). The standard (core) nodegroup runs
41
+ the always-on services on two to four 4-vCPU nodes; burst capacity lives in
42
+ the dedicated burst nodegroup below.
43
+
44
+ ### Burst worker nodegroup (default on)
45
+
46
+ `EnableBurstPool` (`"true"`), `BurstInstanceType` (`c7i.4xlarge`, 16 vCPU),
47
+ `BurstNodeMaxSize` (`1`). One large on-demand node that scales 0 -> 1 on
48
+ demand, labeled and tainted `rulebricks.com/pool=burst`: the Rulebricks chart
49
+ makes workers tolerate the taint and softly prefer the label out of the box,
50
+ so the scaled-out worker fleet lands here while core services stay on the
51
+ standard nodegroup. Sizing math: 2 x 4 vCPU core floor + 16 vCPU burst =
52
+ 24 vCPU running steady-state at full burst, and exactly 32 vCPU even with
53
+ the core nodegroup at its 4-node max. Note: EKS has no parked-VM equivalent of AKS
54
+ Deallocate, so each burst cold-provisions the node (~2-3 min); the warm
55
+ worker floor on the core nodes carries traffic during provisioning, and a
56
+ Karpenter NodePool carrying the same label/taint is the planned fast path.
25
57
 
26
- ## Create The Cluster
58
+ > `NodeInstanceType` and the node AMI are coupled: `c7i` is x86, so the template
59
+ > uses `AL2023_x86_64_STANDARD`. If you switch to a Graviton/ARM type (e.g.
60
+ > `c8g`), change `AmiType` to `AL2023_ARM_64_STANDARD` or the nodes won't boot.
61
+
62
+ ## Region
63
+
64
+ CloudFormation is regional — the stack deploys to whatever region your CLI call
65
+ targets. Set it with `--region` (or `AWS_REGION` / your profile), not a
66
+ parameter. Availability zones auto-resolve to that region.
67
+
68
+ ## Check access
27
69
 
28
70
  ```bash
29
- eksctl create cluster -f cluster.yaml
71
+ AWS_REGION=us-east-1 bash check-aws-prereqs.sh
30
72
  ```
31
73
 
32
- `eksctl` updates kubeconfig automatically. To refresh it manually:
74
+ The stack creates named IAM roles, so the deploying principal must be able to
75
+ create roles, and the deploy must pass `--capabilities CAPABILITY_NAMED_IAM`
76
+ (below). The check script flags this.
77
+
78
+ ## Create the cluster
33
79
 
34
80
  ```bash
81
+ aws cloudformation create-stack \
82
+ --stack-name rulebricks-cluster \
83
+ --region us-east-1 \
84
+ --template-body file://rulebricks-cluster.cfn.yaml \
85
+ --parameters file://parameters.json \
86
+ --capabilities CAPABILITY_NAMED_IAM
87
+
88
+ aws cloudformation wait stack-create-complete \
89
+ --stack-name rulebricks-cluster --region us-east-1
90
+
35
91
  aws eks update-kubeconfig --name rulebricks-cluster --region us-east-1
36
92
  ```
37
93
 
38
- Use `rulebricks init` with **Use existing Kubernetes cluster** after kubeconfig works.
94
+ `CAPABILITY_NAMED_IAM` is a single inline flag on the deploy call (no
95
+ prerequisite step) and is required because the role has an explicit name. Run
96
+ `rulebricks init` once kubeconfig works, then select this cluster. Stack outputs
97
+ give `DataBucketName`, `RulebricksRoleArn`, and the AMP `remote_write` URL for
98
+ the CLI.
39
99
 
40
- ## Optional Identity Setup
100
+ ## Delete the cluster
41
101
 
42
- If you use S3 decision-log export or AWS Managed Prometheus remote write, create IAM roles for the Kubernetes service accounts rendered by the CLI:
102
+ Run `rulebricks destroy <deployment-name>` first so Kubernetes removes
103
+ LoadBalancer services and PVC-backed EBS volumes. CloudFormation **cannot delete
104
+ non-empty S3 buckets**, so empty them before deleting the stack:
43
105
 
44
106
  ```bash
45
- NAMESPACE=rulebricks-demo
46
- CLUSTER=rulebricks-cluster
47
- REGION=us-east-1
48
107
  ACCOUNT_ID="$(aws sts get-caller-identity --query Account --output text)"
108
+ aws s3 rm "s3://rulebricks-cluster-data-${ACCOUNT_ID}" --recursive
49
109
 
50
- eksctl utils associate-iam-oidc-provider \
51
- --cluster "$CLUSTER" \
52
- --region "$REGION" \
53
- --approve
54
-
55
- eksctl create iamserviceaccount \
56
- --cluster "$CLUSTER" \
57
- --region "$REGION" \
58
- --namespace "$NAMESPACE" \
59
- --name vector \
60
- --attach-policy-arn arn:aws:iam::"$ACCOUNT_ID":policy/<vector-s3-policy> \
61
- --role-name rulebricks-vector \
62
- --approve
63
-
64
- eksctl create iamserviceaccount \
65
- --cluster "$CLUSTER" \
66
- --region "$REGION" \
67
- --namespace "$NAMESPACE" \
68
- --name prometheus \
69
- --attach-policy-arn arn:aws:iam::aws:policy/AmazonPrometheusRemoteWriteAccess \
70
- --role-name rulebricks-prometheus \
71
- --approve
110
+ aws cloudformation delete-stack --stack-name rulebricks-cluster --region us-east-1
111
+ aws cloudformation wait stack-delete-complete \
112
+ --stack-name rulebricks-cluster --region us-east-1
72
113
  ```
73
114
 
74
- Enter the created role ARNs when prompted by the CLI.
115
+ The stack is the teardown boundary (analogous to the Azure resource group):
116
+ deleting it removes the cluster, node group, VPC, the IAM role, Pod Identity
117
+ associations, AMP workspace, and the (emptied) bucket.
118
+
119
+ ## Notes
120
+
121
+ - Rulebricks uses a Kubernetes LoadBalancer service; EKS provisions the load balancer and its `80`/`443` security-group rules. In a locked-down VPC, ensure public inbound `80`/`443` can reach it for DNS and cert-manager HTTP-01 validation.
122
+ - Pod Identity requires the `eks-pod-identity-agent` add-on, which the stack installs.
123
+ - To bring your own buckets or AMP workspace, replace the corresponding resources with parameters and references (not enabled by default to keep the stack compact).
@@ -1,78 +1,242 @@
1
1
  #!/usr/bin/env bash
2
+ # Rulebricks AWS / EKS prerequisite check.
3
+ #
4
+ # Prints a short pass/fail report and a final READY / NOT READY verdict
5
+ # with the exact actions you need to take before deploying the CloudFormation
6
+ # stack.
7
+ #
8
+ # Env vars:
9
+ # AWS_REGION / AWS_DEFAULT_REGION Region to check (default: us-east-1)
10
+ # AWS_PROFILE Optional named profile to verify
11
+ # VERBOSE=1 Print raw AWS error messages inline
12
+
2
13
  set -euo pipefail
3
14
 
15
+ if [[ -z "${BASH_VERSION:-}" ]]; then
16
+ exec bash "$0" "$@"
17
+ fi
18
+
19
+ export AWS_PAGER=""
20
+
4
21
  REGION="${AWS_REGION:-${AWS_DEFAULT_REGION:-us-east-1}}"
5
22
  REQUIRED_VCPU=8
23
+ VERBOSE="${VERBOSE:-0}"
24
+
25
+ ACTIONS=()
26
+ BLOCKERS=0
27
+
28
+ # ---------- helpers ----------
6
29
 
7
30
  require_cmd() {
8
31
  command -v "$1" >/dev/null 2>&1 || {
9
- echo "Missing required command: $1" >&2
32
+ printf "ERROR: required command not found: %s\n" "$1" >&2
10
33
  exit 1
11
34
  }
12
35
  }
13
36
 
14
- check_aws() {
15
- echo "Checking AWS identity..."
16
- aws sts get-caller-identity --output table
17
- echo
37
+ # Run an aws command. Sets AWS_STDOUT / AWS_STDERR / AWS_RC. Never aborts.
38
+ aws_run() {
39
+ AWS_STDOUT=""; AWS_STDERR=""; AWS_RC=0
40
+ local _err
41
+ _err="$(mktemp)"
42
+ AWS_STDOUT="$(aws "$@" 2>"$_err")" || AWS_RC=$?
43
+ AWS_STDERR="$(cat "$_err")"
44
+ rm -f "$_err"
45
+ if [[ "$VERBOSE" == "1" && -n "$AWS_STDERR" ]]; then
46
+ printf " debug: %s\n" "${AWS_STDERR%%$'\n'*}" >&2
47
+ fi
48
+ return "$AWS_RC"
18
49
  }
19
50
 
20
- check_permission() {
21
- local label="$1"
22
- shift
23
- if "$@" >/dev/null 2>&1; then
24
- echo "OK: $label"
25
- else
26
- echo "WARN: Could not verify $label"
27
- echo " Command failed: $*"
28
- fi
51
+ is_auth_error() {
52
+ [[ "$AWS_STDERR" == *"ExpiredToken"* ]] && return 0
53
+ [[ "$AWS_STDERR" == *"InvalidClientTokenId"* ]] && return 0
54
+ [[ "$AWS_STDERR" == *"UnrecognizedClientException"* ]] && return 0
55
+ [[ "$AWS_STDERR" == *"Unable to locate credentials"* ]] && return 0
56
+ [[ "$AWS_STDERR" == *"SignatureDoesNotMatch"* ]] && return 0
57
+ [[ "$AWS_STDERR" == *"TokenRefreshRequired"* ]] && return 0
58
+ [[ "$AWS_STDERR" == *"SSOTokenLoadError"* ]] && return 0
59
+ [[ "$AWS_STDERR" == *"sso login"* ]] && return 0
60
+ return 1
29
61
  }
30
62
 
31
- check_quota() {
32
- echo "Checking regional on-demand vCPU quota in $REGION..."
33
- local quota
34
- quota="$(aws service-quotas get-service-quota \
35
- --service-code ec2 \
36
- --quota-code L-1216C47A \
37
- --region "$REGION" \
38
- --query 'Quota.Value' \
39
- --output text 2>/dev/null || true)"
63
+ row() {
64
+ printf " %-50s %s\n" "$1" "$2"
65
+ }
40
66
 
41
- if [[ -z "$quota" || "$quota" == "None" ]]; then
42
- echo "WARN: Could not read EC2 on-demand vCPU quota."
43
- return
44
- fi
67
+ mark_blocker() { BLOCKERS=$((BLOCKERS + 1)); }
68
+ add_action() { ACTIONS+=("$1"); }
45
69
 
46
- local quota_int="${quota%.*}"
47
- if (( quota_int < REQUIRED_VCPU )); then
48
- echo "WARN: Quota may be too low: ${quota} vCPU available, ${REQUIRED_VCPU}+ recommended for the included cluster config."
70
+ login_hint() {
71
+ if [[ -n "${AWS_PROFILE:-}" ]]; then
72
+ printf "aws sso login --profile %s (or refresh credentials for profile '%s')" "$AWS_PROFILE" "$AWS_PROFILE"
49
73
  else
50
- echo "OK: EC2 on-demand vCPU quota is ${quota}."
74
+ printf "aws sso login (or 'aws configure' to set up credentials)"
51
75
  fi
52
76
  }
53
77
 
78
+ # ---------- pre-flight ----------
79
+ # Note: eksctl is NOT required. The cluster is deployed via a single
80
+ # CloudFormation stack, so only the AWS CLI plus kubectl/helm are needed.
81
+
54
82
  require_cmd aws
55
- require_cmd eksctl
56
83
  require_cmd kubectl
57
84
  require_cmd helm
58
85
 
59
- echo "Rulebricks AWS access checks"
60
- echo "Region: $REGION"
61
- echo
62
-
63
- check_aws
64
- check_permission "EKS access" aws eks list-clusters --region "$REGION"
65
- check_permission "EC2 VPC access" aws ec2 describe-vpcs --region "$REGION" --max-items 5
66
- check_permission "IAM access" aws iam get-user
67
- check_permission "ECR public image pull path" aws ecr-public describe-registries --region us-east-1
68
- check_quota
69
-
70
- echo
71
- echo "Checking local Kubernetes tools..."
72
- eksctl version >/dev/null
73
- kubectl version --client=true >/dev/null
74
- helm version >/dev/null
75
- echo "OK: eksctl, kubectl, and Helm are installed."
76
-
77
- echo
78
- echo "AWS access checks completed. Warnings may require cloud-admin review before cluster creation."
86
+ printf "Rulebricks AWS prerequisite check\n"
87
+ printf " Region: %s\n" "$REGION"
88
+ [[ -n "${AWS_PROFILE:-}" ]] && printf " Profile: %s\n" "$AWS_PROFILE"
89
+ printf "\n"
90
+
91
+ # ---------- 1. Authentication ----------
92
+ AUTH_OK=0
93
+ ACCOUNT_ID=""
94
+ CALLER_ARN=""
95
+
96
+ if aws_run sts get-caller-identity --query "Account" --output text; then
97
+ ACCOUNT_ID="$AWS_STDOUT"
98
+ if aws_run sts get-caller-identity --query "Arn" --output text; then
99
+ CALLER_ARN="$AWS_STDOUT"
100
+ fi
101
+ row "AWS credentials valid" "OK ($ACCOUNT_ID)"
102
+ [[ -n "$CALLER_ARN" ]] && row "Caller identity" "$CALLER_ARN"
103
+ AUTH_OK=1
104
+ else
105
+ if is_auth_error; then
106
+ row "AWS credentials valid" "FAIL - credentials missing or expired"
107
+ else
108
+ row "AWS credentials valid" "FAIL - ${AWS_STDERR%%$'\n'*}"
109
+ fi
110
+ add_action "Refresh credentials: $(login_hint)"
111
+ mark_blocker
112
+ fi
113
+
114
+ if [[ $AUTH_OK -eq 0 ]]; then
115
+ printf "\nRemaining checks skipped - fix authentication first.\n"
116
+ printf "\n========================================\n"
117
+ printf "RESULT: NOT READY\n"
118
+ printf "========================================\n"
119
+ printf "Required actions:\n"
120
+ i=1
121
+ for a in "${ACTIONS[@]}"; do
122
+ printf " %d. %s\n" "$i" "$a"
123
+ i=$((i + 1))
124
+ done
125
+ exit 1
126
+ fi
127
+
128
+ # ---------- 2. Service access ----------
129
+ # These cover what the CloudFormation stack touches: EKS, EC2/VPC, IAM (roles +
130
+ # Pod Identity associations), S3 (log/backup buckets), APS (managed Prometheus),
131
+ # and CloudFormation itself.
132
+ declare -a missing_access=()
133
+
134
+ aws_run eks list-clusters --region "$REGION" --output text >/dev/null \
135
+ || missing_access+=("eks:ListClusters")
136
+ aws_run ec2 describe-vpcs --region "$REGION" --max-items 5 --output text >/dev/null \
137
+ || missing_access+=("ec2:DescribeVpcs")
138
+ aws_run iam list-roles --max-items 5 --output text >/dev/null \
139
+ || missing_access+=("iam:ListRoles")
140
+ aws_run s3api list-buckets --output text >/dev/null \
141
+ || missing_access+=("s3:ListAllMyBuckets")
142
+ aws_run aps list-workspaces --region "$REGION" --output text >/dev/null \
143
+ || missing_access+=("aps:ListWorkspaces")
144
+ aws_run cloudformation list-stacks --region "$REGION" --output text >/dev/null \
145
+ || missing_access+=("cloudformation:ListStacks")
146
+
147
+ if [[ ${#missing_access[@]} -eq 0 ]]; then
148
+ row "EKS/EC2/IAM/S3/APS/CFN access" "OK"
149
+ else
150
+ row "EKS/EC2/IAM/S3/APS/CFN access" "WARN - missing: ${missing_access[*]}"
151
+ add_action "Ask your AWS admin to grant the missing IAM actions in $REGION: ${missing_access[*]}"
152
+ fi
153
+
154
+ # ---------- 3. IAM role-creation rights (CAPABILITY_NAMED_IAM) ----------
155
+ # The stack creates named IAM roles, so the deploying principal must be allowed
156
+ # to create roles and attach policies. We can't fully simulate this without
157
+ # iam:SimulatePrincipalPolicy, but we can flag whether the caller is obviously
158
+ # an admin vs. a scoped role so the operator knows to expect a capability prompt.
159
+ if aws_run iam simulate-principal-policy \
160
+ --policy-source-arn "$CALLER_ARN" \
161
+ --action-names iam:CreateRole iam:AttachRolePolicy iam:PutRolePolicy \
162
+ --query "EvaluationResults[?EvalDecision=='allowed'] | length(@)" \
163
+ --output text; then
164
+ allowed="$AWS_STDOUT"
165
+ if [[ "$allowed" == "3" ]]; then
166
+ row "IAM role-creation rights" "OK"
167
+ else
168
+ row "IAM role-creation rights" "WARN - some IAM create/attach actions denied"
169
+ add_action "The stack creates named IAM roles (deploy needs CAPABILITY_NAMED_IAM). Ensure your principal can iam:CreateRole / iam:AttachRolePolicy / iam:PutRolePolicy, or have an admin deploy."
170
+ fi
171
+ else
172
+ # SimulatePrincipalPolicy itself is often denied for non-admins; don't block.
173
+ row "IAM role-creation rights" "WARN - could not simulate (needs iam:SimulatePrincipalPolicy)"
174
+ add_action "Could not verify IAM role-creation rights. The stack creates named IAM roles and must be deployed with --capabilities CAPABILITY_NAMED_IAM by a principal allowed to create roles."
175
+ fi
176
+
177
+ # ---------- 4. EC2 on-demand vCPU quota ----------
178
+ quota_label="EC2 on-demand vCPU quota in $REGION (need ${REQUIRED_VCPU}+)"
179
+ if aws_run service-quotas get-service-quota \
180
+ --service-code ec2 \
181
+ --quota-code L-1216C47A \
182
+ --region "$REGION" \
183
+ --query "Quota.Value" \
184
+ --output text; then
185
+ quota="$AWS_STDOUT"
186
+ if [[ -z "$quota" || "$quota" == "None" ]]; then
187
+ row "$quota_label" "WARN - empty response"
188
+ add_action "Check the EC2 'Running On-Demand Standard vCPUs' quota in the AWS console: Service Quotas → EC2."
189
+ else
190
+ quota_int="${quota%.*}"
191
+ if (( quota_int < REQUIRED_VCPU )); then
192
+ row "$quota_label" "WARN ($quota available)"
193
+ add_action "Request a quota increase: AWS console → Service Quotas → EC2 → 'Running On-Demand Standard vCPUs' in $REGION."
194
+ else
195
+ row "$quota_label" "OK ($quota available)"
196
+ fi
197
+ fi
198
+ else
199
+ row "$quota_label" "WARN - could not read quota"
200
+ add_action "Manually verify EC2 vCPU quota in the AWS console (Service Quotas → EC2) for $REGION."
201
+ fi
202
+
203
+ # ---------- 5. Local tools ----------
204
+ missing_tools=()
205
+ kubectl version --client=true >/dev/null 2>&1 || missing_tools+=("kubectl")
206
+ helm version >/dev/null 2>&1 || missing_tools+=("helm")
207
+
208
+ if [[ ${#missing_tools[@]} -gt 0 ]]; then
209
+ uniq_tools="$(printf '%s\n' "${missing_tools[@]}" | sort -u | tr '\n' ' ')"
210
+ row "Local tools (kubectl, helm)" "FAIL - missing/broken: ${uniq_tools% }"
211
+ add_action "Install/repair: ${uniq_tools% }"
212
+ mark_blocker
213
+ else
214
+ row "Local tools (kubectl, helm)" "OK"
215
+ fi
216
+
217
+ # ---------- summary ----------
218
+ printf "\n========================================\n"
219
+ if [[ $BLOCKERS -eq 0 && ${#ACTIONS[@]} -eq 0 ]]; then
220
+ printf "RESULT: READY - you can deploy the CloudFormation stack.\n"
221
+ printf "========================================\n"
222
+ exit 0
223
+ elif [[ $BLOCKERS -eq 0 ]]; then
224
+ printf "RESULT: READY WITH WARNINGS\n"
225
+ printf "========================================\n"
226
+ printf "The deploy should work, but address these first if possible:\n"
227
+ else
228
+ printf "RESULT: NOT READY\n"
229
+ printf "========================================\n"
230
+ printf "Required actions:\n"
231
+ fi
232
+
233
+ i=1
234
+ for a in "${ACTIONS[@]}"; do
235
+ printf " %d. %s\n" "$i" "$a"
236
+ i=$((i + 1))
237
+ done
238
+
239
+ printf "\nRe-run this script after completing the actions above.\n"
240
+ printf "(Set VERBOSE=1 to see raw AWS error messages.)\n"
241
+
242
+ [[ $BLOCKERS -gt 0 ]] && exit 1 || exit 0
@@ -0,0 +1,13 @@
1
+ [
2
+ { "ParameterKey": "ClusterName", "ParameterValue": "rulebricks-cluster" },
3
+ { "ParameterKey": "KubernetesVersion", "ParameterValue": "1.34" },
4
+ { "ParameterKey": "NodeInstanceType", "ParameterValue": "c7gn.xlarge" },
5
+ { "ParameterKey": "NodeDesiredCapacity", "ParameterValue": "2" },
6
+ { "ParameterKey": "NodeMinSize", "ParameterValue": "2" },
7
+ { "ParameterKey": "NodeMaxSize", "ParameterValue": "4" },
8
+ { "ParameterKey": "EnableBurstPool", "ParameterValue": "true" },
9
+ { "ParameterKey": "BurstInstanceType", "ParameterValue": "c7g.8xlarge" },
10
+ { "ParameterKey": "BurstNodeMaxSize", "ParameterValue": "1" },
11
+ { "ParameterKey": "NodeVolumeSizeGiB", "ParameterValue": "50" },
12
+ { "ParameterKey": "VpcCidr", "ParameterValue": "10.0.0.0/16" }
13
+ ]