@rulebricks/cli 2.1.6 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/README.md +75 -14
  2. package/cluster-setup/aws/README.md +123 -0
  3. package/cluster-setup/aws/check-aws-access.sh +242 -0
  4. package/cluster-setup/aws/parameters.json +13 -0
  5. package/cluster-setup/aws/rulebricks-cluster.cfn.yaml +355 -0
  6. package/cluster-setup/azure/README.md +141 -0
  7. package/cluster-setup/azure/check-aks-prereqs.sh +276 -0
  8. package/cluster-setup/azure/parameters.json +30 -0
  9. package/cluster-setup/azure/rulebricks-cluster.bicep +546 -0
  10. package/cluster-setup/gcp/README.md +189 -0
  11. package/cluster-setup/gcp/check-gke-prereqs.sh +260 -0
  12. package/dist/commands/backup.d.ts +5 -0
  13. package/dist/commands/backup.js +104 -0
  14. package/dist/commands/deploy.d.ts +3 -1
  15. package/dist/commands/deploy.js +226 -326
  16. package/dist/commands/destroy.d.ts +1 -1
  17. package/dist/commands/destroy.js +73 -123
  18. package/dist/commands/init.d.ts +5 -1
  19. package/dist/commands/init.js +78 -47
  20. package/dist/commands/list.d.ts +1 -0
  21. package/dist/commands/list.js +74 -0
  22. package/dist/commands/open.d.ts +1 -1
  23. package/dist/commands/open.js +4 -12
  24. package/dist/commands/redeploy.d.ts +6 -0
  25. package/dist/commands/redeploy.js +310 -0
  26. package/dist/commands/restore.d.ts +5 -0
  27. package/dist/commands/restore.js +338 -0
  28. package/dist/commands/status.js +62 -49
  29. package/dist/commands/upgrade.js +74 -51
  30. package/dist/components/DNSWaitScreen.d.ts +5 -1
  31. package/dist/components/DNSWaitScreen.js +47 -41
  32. package/dist/components/Wizard/WizardContext.d.ts +174 -29
  33. package/dist/components/Wizard/WizardContext.js +896 -91
  34. package/dist/components/Wizard/steps/CloudProviderStep.js +192 -102
  35. package/dist/components/Wizard/steps/DomainStep.js +5 -24
  36. package/dist/components/Wizard/steps/ExternalServicesStep.d.ts +6 -0
  37. package/dist/components/Wizard/steps/ExternalServicesStep.js +645 -0
  38. package/dist/components/Wizard/steps/FeatureConfigStep.d.ts +2 -1
  39. package/dist/components/Wizard/steps/FeatureConfigStep.js +959 -248
  40. package/dist/components/Wizard/steps/FeaturesStep.js +31 -35
  41. package/dist/components/Wizard/steps/ObservabilityStep.d.ts +6 -0
  42. package/dist/components/Wizard/steps/ObservabilityStep.js +137 -0
  43. package/dist/components/Wizard/steps/ReviewStep.d.ts +2 -1
  44. package/dist/components/Wizard/steps/ReviewStep.js +56 -7
  45. package/dist/components/Wizard/steps/StorageStep.d.ts +9 -0
  46. package/dist/components/Wizard/steps/StorageStep.js +592 -0
  47. package/dist/components/Wizard/steps/SupabaseCredentialsStep.js +20 -21
  48. package/dist/components/Wizard/steps/VersionStep.js +45 -23
  49. package/dist/components/Wizard/steps/index.d.ts +3 -3
  50. package/dist/components/Wizard/steps/index.js +3 -3
  51. package/dist/components/common/CommandApproval.d.ts +12 -0
  52. package/dist/components/common/CommandApproval.js +91 -0
  53. package/dist/components/common/DeploymentPicker.d.ts +14 -0
  54. package/dist/components/common/DeploymentPicker.js +16 -0
  55. package/dist/components/common/index.d.ts +2 -0
  56. package/dist/components/common/index.js +2 -0
  57. package/dist/index.js +94 -62
  58. package/dist/lib/cloudCli.d.ts +134 -63
  59. package/dist/lib/cloudCli.js +512 -220
  60. package/dist/lib/clusterSetupDefaults.d.ts +30 -0
  61. package/dist/lib/clusterSetupDefaults.js +64 -0
  62. package/dist/lib/commandApproval.d.ts +26 -0
  63. package/dist/lib/commandApproval.js +114 -0
  64. package/dist/lib/config.d.ts +12 -10
  65. package/dist/lib/config.js +91 -33
  66. package/dist/lib/configFixtures.d.ts +5 -0
  67. package/dist/lib/configFixtures.js +513 -0
  68. package/dist/lib/deploymentHealth.d.ts +32 -0
  69. package/dist/lib/deploymentHealth.js +157 -0
  70. package/dist/lib/dns.d.ts +1 -1
  71. package/dist/lib/dns.js +19 -1
  72. package/dist/lib/dns.test.d.ts +1 -0
  73. package/dist/lib/dns.test.js +27 -0
  74. package/dist/lib/dockerHub.d.ts +12 -1
  75. package/dist/lib/dockerHub.js +18 -8
  76. package/dist/lib/helm.d.ts +4 -0
  77. package/dist/lib/helm.js +16 -0
  78. package/dist/lib/helmValues.d.ts +25 -0
  79. package/dist/lib/helmValues.js +1937 -259
  80. package/dist/lib/helmValues.test.d.ts +1 -0
  81. package/dist/lib/helmValues.test.js +966 -0
  82. package/dist/lib/htpasswd.d.ts +1 -0
  83. package/dist/lib/htpasswd.js +15 -0
  84. package/dist/lib/kubernetes.d.ts +126 -13
  85. package/dist/lib/kubernetes.js +624 -134
  86. package/dist/lib/secrets.d.ts +23 -0
  87. package/dist/lib/secrets.js +158 -0
  88. package/dist/lib/validateValues.d.ts +31 -0
  89. package/dist/lib/validateValues.js +253 -0
  90. package/dist/lib/versions.d.ts +82 -11
  91. package/dist/lib/versions.js +131 -31
  92. package/dist/lib/versions.test.d.ts +1 -0
  93. package/dist/lib/versions.test.js +81 -0
  94. package/dist/lib/wizardSteps.d.ts +14 -0
  95. package/dist/lib/wizardSteps.js +23 -0
  96. package/dist/lib/workloadIdentity.d.ts +26 -0
  97. package/dist/lib/workloadIdentity.js +323 -0
  98. package/dist/lib/workloadIdentity.test.d.ts +1 -0
  99. package/dist/lib/workloadIdentity.test.js +57 -0
  100. package/dist/types/index.d.ts +2152 -95
  101. package/dist/types/index.js +554 -286
  102. package/package.json +10 -4
  103. package/schema/values.schema.json +1934 -0
  104. package/dist/components/Wizard/steps/CredentialsStep.d.ts +0 -6
  105. package/dist/components/Wizard/steps/CredentialsStep.js +0 -22
  106. package/dist/components/Wizard/steps/DeploymentModeStep.d.ts +0 -5
  107. package/dist/components/Wizard/steps/DeploymentModeStep.js +0 -26
  108. package/dist/components/Wizard/steps/TierStep.d.ts +0 -6
  109. package/dist/components/Wizard/steps/TierStep.js +0 -29
  110. package/dist/lib/terraform.d.ts +0 -66
  111. package/dist/lib/terraform.js +0 -754
  112. package/terraform/aws/main.tf +0 -355
  113. package/terraform/azure/main.tf +0 -371
  114. package/terraform/gcp/main.tf +0 -407
package/README.md CHANGED
@@ -1,8 +1,8 @@
1
1
  ![Banner](./banner.png)
2
2
 
3
- The Rulebricks CLI is a management utility that automates the creation and maintenance of private Rulebricks clusters, helping you deploy Rulebricks in customizable, high-throughput configurations on AWS, GCP, or Azure.
3
+ The Rulebricks CLI is a management utility for configuring and deploying private Rulebricks instances onto Kubernetes clusters you already control.
4
4
 
5
- You can choose how much you would like the CLI to automate for you– use it to generate valid configuration values, automate infrastructure provisioning (via Terraform), software deployment (via Helm), or all of the above.
5
+ It focuses on generating valid Rulebricks configuration values, sizing the application from the selected cluster's available resources, and deploying the Helm chart.
6
6
 
7
7
  ## Installation
8
8
 
@@ -17,6 +17,8 @@ to deploy using this CLI. You will be
17
17
  requested for this key during project
18
18
  configuration.
19
19
 
20
+ You must also have an **available Kubernetes cluster** to deploy to. You can use the `cluster-setup` directory to easily create a standalone cluster for Rulebricks. These resources satisfy the minimum cluster requirements, role/identity resources, and object storage buckets required for a production deployment, and double as documentation for teams looking to deploy Rulebricks to an existing cluster.
21
+
20
22
  Rulebricks requires TLS. You will require either external-dns on your cluster to automatically add DNS records, or you will need **access** to manually add **DNS records** for the subdomain(s) where you would like to access your private deployment from.
21
23
 
22
24
  Finally, you will need to have the following tools installed and ready on your machine:
@@ -24,8 +26,33 @@ Finally, you will need to have the following tools installed and ready on your m
24
26
  - **Node.js** >= 20
25
27
  - **kubectl** - Kubernetes CLI
26
28
  - **Helm** >= 3.0
27
- - **Terraform** >= 1.0 (for infrastructure provisioning)
28
- - Cloud CLI (`aws`, `gcloud`, or `az`) configured for your provider
29
+ - Cloud CLI (`aws`, `gcloud`, or `az`) configured for your provider if you want the wizard to discover clusters or refresh kubeconfig
30
+
31
+ ## Cluster Setup
32
+
33
+ Create or select a Kubernetes cluster before running the CLI wizard. If you need a starting point, use the resources in `cluster-setup/`; they provide minimum compatible AWS, Azure, and GCP setup guidance plus optional access checks. Monitoring destinations are configured later by the CLI wizard and Helm values, not by these cluster setup files.
34
+
35
+ ```bash
36
+ # AWS: optional access check, then create EKS with eksctl
37
+ AWS_REGION=us-east-1 bash cluster-setup/aws/check-aws-access.sh
38
+ cd cluster-setup/aws && eksctl create cluster -f cluster.yaml
39
+
40
+ # Azure: optional access check, then deploy AKS with Bicep
41
+ az login
42
+ az account set --subscription <subscription-id>
43
+ AZURE_LOCATION=eastus bash cluster-setup/azure/check-aks-prereqs.sh
44
+ az group create --name rulebricks-rg --location eastus
45
+ az deployment group create \
46
+ --resource-group rulebricks-rg \
47
+ --template-file cluster-setup/azure/main.bicep \
48
+ --parameters @cluster-setup/azure/main.parameters.json
49
+
50
+ # GCP: optional access check, then create GKE with gcloud
51
+ GCP_REGION=us-central1 bash cluster-setup/gcp/check-gke-prereqs.sh
52
+ # Follow cluster-setup/gcp/README.md for the gcloud create commands.
53
+ ```
54
+
55
+ After the cluster exists, update kubeconfig, then run `rulebricks init`. The wizard can also refresh kubeconfig for EKS, GKE, or AKS when provider details are available.
29
56
 
30
57
  ## Quick Start
31
58
 
@@ -33,24 +60,58 @@ Finally, you will need to have the following tools installed and ready on your m
33
60
  # Configuration wizard (generates values.yaml)
34
61
  rulebricks init
35
62
 
36
- # Provision and/or deploy to your cluster
63
+ # Deploy to your cluster
37
64
  rulebricks deploy my-deployment
38
65
  ```
39
66
 
67
+ The generated Helm values pin one Rulebricks product version under
68
+ `global.version`. That single semantic version selects the app, HPS, and HPS
69
+ worker images together.
70
+
40
71
  ## Main Commands
41
72
 
42
- | Command | Description |
43
- | --------------------------- | -------------------------------------- |
44
- | `rulebricks init` | Interactive setup wizard |
45
- | `rulebricks deploy [name]` | Deploy to Kubernetes |
46
- | `rulebricks upgrade [name]` | Upgrade to a new version |
47
- | `rulebricks destroy [name]` | Remove a deployment |
48
- | `rulebricks status [name]` | Show deployment health |
49
- | `rulebricks logs [name]` | Inspect services |
50
- | `rulebricks open [name]` | Open the generated configuration files |
73
+ | Command | Description |
74
+ | --------------------------- | ---------------------------------------- |
75
+ | `rulebricks init` | Interactive setup wizard |
76
+ | `rulebricks deploy [name]` | Deploy to Kubernetes |
77
+ | `rulebricks upgrade [name]` | Upgrade to a new version |
78
+ | `rulebricks destroy [name]` | Remove a deployment |
79
+ | `rulebricks status [name]` | Show deployment health |
80
+ | `rulebricks logs [name]` | Inspect services |
81
+ | `rulebricks open [name]` | Open the generated configuration files |
82
+ | `rulebricks backup [name]` | Run an on-demand database backup |
83
+ | `rulebricks restore [name]` | Restore the database from object storage |
51
84
 
52
85
  Use `rulebricks -h` to explore all commands, and add `-h` to any command to learn more about a particular command's options.
53
86
 
87
+ ## Monitoring
88
+
89
+ Self-hosted deployments enable Prometheus monitoring by default. The wizard only asks whether you want to configure a Prometheus `remote_write` destination; you can skip that step if you do not yet have AWS Managed Prometheus, Azure Monitor managed Prometheus, Grafana Cloud, or another remote-write-compatible backend ready.
90
+
91
+ By default, generated Helm values install `kube-prometheus-stack`, scrape Kubernetes and cluster metrics, and add Rulebricks scrape targets for:
92
+
93
+ - App/admin API health: request counts, latency histograms, coarse rejection counts, and frontend error counts.
94
+ - HPS rule-engine traffic: request counts, latency histograms, coarse rejection counts, Kafka worker wait time, bulk/parallel item volume, and memory cache stats.
95
+ - Supporting infrastructure where available: Kafka JMX, ClickHouse metrics when ClickHouse is enabled, and Traefik's Prometheus endpoint. Traefik's ServiceMonitor remains an explicit opt-in after Prometheus Operator CRDs are installed.
96
+
97
+ Metrics intentionally use low-cardinality labels such as route template, method, status class, operation, and rejection reason. They do not include API keys, users, organizations, IP addresses, raw URLs, rule slugs, flow slugs, or exception messages.
98
+
99
+ Useful PromQL examples:
100
+
101
+ ```promql
102
+ histogram_quantile(0.95, sum(rate(rulebricks_hps_http_request_duration_seconds_bucket[5m])) by (le, route))
103
+ sum(rate(rulebricks_hps_rejections_total[5m])) by (route, reason)
104
+ histogram_quantile(0.95, sum(rate(rulebricks_hps_kafka_request_duration_seconds_bucket[5m])) by (le, operation))
105
+ sum(rate(rulebricks_hps_bulk_items_total[5m])) by (operation)
106
+ sum(rate(rulebricks_app_frontend_errors_total[5m])) by (source)
107
+ ```
108
+
109
+ ## Object Storage and Backups
110
+
111
+ The wizard now collects a shared object storage backend for every deployment. Rulebricks uses separate prefixes in that bucket for decision logs (`decision-logs/`) and self-hosted Supabase database backups (`db-backups/`).
112
+
113
+ Database backups are optional for self-hosted Supabase deployments. When enabled, the Helm chart schedules Barman base backups according to the configured cron schedule and retention window. You can also run `rulebricks backup <name>` to trigger an on-demand backup, or `rulebricks restore <name>` to list backups in object storage and interactively restore one after confirmation.
114
+
54
115
  ## Notes
55
116
 
56
117
  There are a uniquely wide variety of customization options this CLI makes available (multi-cloud, hybrid vs. self-hosted database deployment, custom email templates, etc.), and not all combinations have been validated.
@@ -0,0 +1,123 @@
1
+ # AWS Cluster Setup
2
+
3
+ A compact, turnkey EKS cluster for Rulebricks. One CloudFormation stack creates
4
+ the cluster **and** the S3 bucket + Amazon Managed Prometheus workspace the
5
+ platform needs, wired to workloads via **EKS Pod Identity** (AWS's recommended
6
+ mechanism for new clusters — no OIDC provider to manage).
7
+
8
+ `eksctl` is not used: it can create a cluster but not the bucket or AMP
9
+ workspace, so the full picture lives in one stack instead.
10
+
11
+ ## Files
12
+
13
+ - `rulebricks-cluster.cfn.yaml` — VPC, EKS cluster + managed node group, EBS CSI + Pod Identity add-ons, one S3 data bucket, AMP workspace, and a single IAM role. (The CLI creates the namespace-scoped Pod Identity associations at deploy time.)
14
+ - `parameters.json` — sample parameter overrides (omit any to use template defaults).
15
+ - `check-aws-prereqs.sh` — verifies identity, service access, IAM role-creation rights, quota, kubectl/helm.
16
+
17
+ ## One role, one bucket
18
+
19
+ A single IAM role, `<cluster>-rulebricks`, is bound to the ServiceAccounts that
20
+ need cloud access via `EKS::PodIdentityAssociation`. All data lives in one
21
+ bucket, `<cluster>-data-<account-id>`, under per-purpose prefixes.
22
+
23
+ | Path | Service account | Permission / target |
24
+ | --------------------------------- | ------------------------------------ | --------------------------------------------------------- |
25
+ | Decision logs (Vector → S3) | `vector` | `s3:*Object`/`ListBucket` → `<cluster>-data/decision-logs/` |
26
+ | DB backups (job → S3) | `rulebricks-<deploymentName>-backup` | `s3:*Object`/`ListBucket` → `<cluster>-data/db-backups/` |
27
+ | Metrics (Prometheus remote write) | `prometheus` | `aps:RemoteWrite` → AMP workspace |
28
+
29
+ The bucket is encrypted and has public access blocked.
30
+
31
+ > **This stack does not need a deployment name.** `EKS::PodIdentityAssociation` is
32
+ > `namespace`-scoped, so the **Rulebricks CLI creates the associations** (vector / backup /
33
+ > prometheus → this role) at `rulebricks deploy` time. The stack only provisions the
34
+ > deployment-independent role, bucket, and AMP workspace, so one cluster can host many deployments.
35
+
36
+ ## Core cluster parameters
37
+
38
+ `ClusterName` (`rulebricks-cluster`), `KubernetesVersion` (`1.34`),
39
+ `NodeInstanceType` (`c7i.xlarge`), `NodeDesiredCapacity`/`NodeMinSize`/`NodeMaxSize`
40
+ (`2`/`2`/`4`), `NodeVolumeSizeGiB` (`50`). The standard (core) nodegroup runs
41
+ the always-on services on two to four 4-vCPU nodes; burst capacity lives in
42
+ the dedicated burst nodegroup below.
43
+
44
+ ### Burst worker nodegroup (default on)
45
+
46
+ `EnableBurstPool` (`"true"`), `BurstInstanceType` (`c7i.4xlarge`, 16 vCPU),
47
+ `BurstNodeMaxSize` (`1`). One large on-demand node that scales 0 -> 1 on
48
+ demand, labeled and tainted `rulebricks.com/pool=burst`: the Rulebricks chart
49
+ makes workers tolerate the taint and softly prefer the label out of the box,
50
+ so the scaled-out worker fleet lands here while core services stay on the
51
+ standard nodegroup. Sizing math: 2 x 4 vCPU core floor + 16 vCPU burst =
52
+ 24 vCPU running steady-state at full burst, and exactly 32 vCPU even with
53
+ the core nodegroup at its 4-node max. Note: EKS has no parked-VM equivalent of AKS
54
+ Deallocate, so each burst cold-provisions the node (~2-3 min); the warm
55
+ worker floor on the core nodes carries traffic during provisioning, and a
56
+ Karpenter NodePool carrying the same label/taint is the planned fast path.
57
+
58
+ > `NodeInstanceType` and the node AMI are coupled: `c7i` is x86, so the template
59
+ > uses `AL2023_x86_64_STANDARD`. If you switch to a Graviton/ARM type (e.g.
60
+ > `c8g`), change `AmiType` to `AL2023_ARM_64_STANDARD` or the nodes won't boot.
61
+
62
+ ## Region
63
+
64
+ CloudFormation is regional — the stack deploys to whatever region your CLI call
65
+ targets. Set it with `--region` (or `AWS_REGION` / your profile), not a
66
+ parameter. Availability zones auto-resolve to that region.
67
+
68
+ ## Check access
69
+
70
+ ```bash
71
+ AWS_REGION=us-east-1 bash check-aws-prereqs.sh
72
+ ```
73
+
74
+ The stack creates named IAM roles, so the deploying principal must be able to
75
+ create roles, and the deploy must pass `--capabilities CAPABILITY_NAMED_IAM`
76
+ (below). The check script flags this.
77
+
78
+ ## Create the cluster
79
+
80
+ ```bash
81
+ aws cloudformation create-stack \
82
+ --stack-name rulebricks-cluster \
83
+ --region us-east-1 \
84
+ --template-body file://rulebricks-cluster.cfn.yaml \
85
+ --parameters file://parameters.json \
86
+ --capabilities CAPABILITY_NAMED_IAM
87
+
88
+ aws cloudformation wait stack-create-complete \
89
+ --stack-name rulebricks-cluster --region us-east-1
90
+
91
+ aws eks update-kubeconfig --name rulebricks-cluster --region us-east-1
92
+ ```
93
+
94
+ `CAPABILITY_NAMED_IAM` is a single inline flag on the deploy call (no
95
+ prerequisite step) and is required because the role has an explicit name. Run
96
+ `rulebricks init` once kubeconfig works, then select this cluster. Stack outputs
97
+ give `DataBucketName`, `RulebricksRoleArn`, and the AMP `remote_write` URL for
98
+ the CLI.
99
+
100
+ ## Delete the cluster
101
+
102
+ Run `rulebricks destroy <deployment-name>` first so Kubernetes removes
103
+ LoadBalancer services and PVC-backed EBS volumes. CloudFormation **cannot delete
104
+ non-empty S3 buckets**, so empty them before deleting the stack:
105
+
106
+ ```bash
107
+ ACCOUNT_ID="$(aws sts get-caller-identity --query Account --output text)"
108
+ aws s3 rm "s3://rulebricks-cluster-data-${ACCOUNT_ID}" --recursive
109
+
110
+ aws cloudformation delete-stack --stack-name rulebricks-cluster --region us-east-1
111
+ aws cloudformation wait stack-delete-complete \
112
+ --stack-name rulebricks-cluster --region us-east-1
113
+ ```
114
+
115
+ The stack is the teardown boundary (analogous to the Azure resource group):
116
+ deleting it removes the cluster, node group, VPC, the IAM role, Pod Identity
117
+ associations, AMP workspace, and the (emptied) bucket.
118
+
119
+ ## Notes
120
+
121
+ - Rulebricks uses a Kubernetes LoadBalancer service; EKS provisions the load balancer and its `80`/`443` security-group rules. In a locked-down VPC, ensure public inbound `80`/`443` can reach it for DNS and cert-manager HTTP-01 validation.
122
+ - Pod Identity requires the `eks-pod-identity-agent` add-on, which the stack installs.
123
+ - To bring your own buckets or AMP workspace, replace the corresponding resources with parameters and references (not enabled by default to keep the stack compact).
@@ -0,0 +1,242 @@
1
+ #!/usr/bin/env bash
2
+ # Rulebricks AWS / EKS prerequisite check.
3
+ #
4
+ # Prints a short pass/fail report and a final READY / NOT READY verdict
5
+ # with the exact actions you need to take before deploying the CloudFormation
6
+ # stack.
7
+ #
8
+ # Env vars:
9
+ # AWS_REGION / AWS_DEFAULT_REGION Region to check (default: us-east-1)
10
+ # AWS_PROFILE Optional named profile to verify
11
+ # VERBOSE=1 Print raw AWS error messages inline
12
+
13
+ set -euo pipefail
14
+
15
+ if [[ -z "${BASH_VERSION:-}" ]]; then
16
+ exec bash "$0" "$@"
17
+ fi
18
+
19
+ export AWS_PAGER=""
20
+
21
+ REGION="${AWS_REGION:-${AWS_DEFAULT_REGION:-us-east-1}}"
22
+ REQUIRED_VCPU=8
23
+ VERBOSE="${VERBOSE:-0}"
24
+
25
+ ACTIONS=()
26
+ BLOCKERS=0
27
+
28
+ # ---------- helpers ----------
29
+
30
+ require_cmd() {
31
+ command -v "$1" >/dev/null 2>&1 || {
32
+ printf "ERROR: required command not found: %s\n" "$1" >&2
33
+ exit 1
34
+ }
35
+ }
36
+
37
+ # Run an aws command. Sets AWS_STDOUT / AWS_STDERR / AWS_RC. Never aborts.
38
+ aws_run() {
39
+ AWS_STDOUT=""; AWS_STDERR=""; AWS_RC=0
40
+ local _err
41
+ _err="$(mktemp)"
42
+ AWS_STDOUT="$(aws "$@" 2>"$_err")" || AWS_RC=$?
43
+ AWS_STDERR="$(cat "$_err")"
44
+ rm -f "$_err"
45
+ if [[ "$VERBOSE" == "1" && -n "$AWS_STDERR" ]]; then
46
+ printf " debug: %s\n" "${AWS_STDERR%%$'\n'*}" >&2
47
+ fi
48
+ return "$AWS_RC"
49
+ }
50
+
51
+ is_auth_error() {
52
+ [[ "$AWS_STDERR" == *"ExpiredToken"* ]] && return 0
53
+ [[ "$AWS_STDERR" == *"InvalidClientTokenId"* ]] && return 0
54
+ [[ "$AWS_STDERR" == *"UnrecognizedClientException"* ]] && return 0
55
+ [[ "$AWS_STDERR" == *"Unable to locate credentials"* ]] && return 0
56
+ [[ "$AWS_STDERR" == *"SignatureDoesNotMatch"* ]] && return 0
57
+ [[ "$AWS_STDERR" == *"TokenRefreshRequired"* ]] && return 0
58
+ [[ "$AWS_STDERR" == *"SSOTokenLoadError"* ]] && return 0
59
+ [[ "$AWS_STDERR" == *"sso login"* ]] && return 0
60
+ return 1
61
+ }
62
+
63
+ row() {
64
+ printf " %-50s %s\n" "$1" "$2"
65
+ }
66
+
67
+ mark_blocker() { BLOCKERS=$((BLOCKERS + 1)); }
68
+ add_action() { ACTIONS+=("$1"); }
69
+
70
+ login_hint() {
71
+ if [[ -n "${AWS_PROFILE:-}" ]]; then
72
+ printf "aws sso login --profile %s (or refresh credentials for profile '%s')" "$AWS_PROFILE" "$AWS_PROFILE"
73
+ else
74
+ printf "aws sso login (or 'aws configure' to set up credentials)"
75
+ fi
76
+ }
77
+
78
+ # ---------- pre-flight ----------
79
+ # Note: eksctl is NOT required. The cluster is deployed via a single
80
+ # CloudFormation stack, so only the AWS CLI plus kubectl/helm are needed.
81
+
82
+ require_cmd aws
83
+ require_cmd kubectl
84
+ require_cmd helm
85
+
86
+ printf "Rulebricks AWS prerequisite check\n"
87
+ printf " Region: %s\n" "$REGION"
88
+ [[ -n "${AWS_PROFILE:-}" ]] && printf " Profile: %s\n" "$AWS_PROFILE"
89
+ printf "\n"
90
+
91
+ # ---------- 1. Authentication ----------
92
+ AUTH_OK=0
93
+ ACCOUNT_ID=""
94
+ CALLER_ARN=""
95
+
96
+ if aws_run sts get-caller-identity --query "Account" --output text; then
97
+ ACCOUNT_ID="$AWS_STDOUT"
98
+ if aws_run sts get-caller-identity --query "Arn" --output text; then
99
+ CALLER_ARN="$AWS_STDOUT"
100
+ fi
101
+ row "AWS credentials valid" "OK ($ACCOUNT_ID)"
102
+ [[ -n "$CALLER_ARN" ]] && row "Caller identity" "$CALLER_ARN"
103
+ AUTH_OK=1
104
+ else
105
+ if is_auth_error; then
106
+ row "AWS credentials valid" "FAIL - credentials missing or expired"
107
+ else
108
+ row "AWS credentials valid" "FAIL - ${AWS_STDERR%%$'\n'*}"
109
+ fi
110
+ add_action "Refresh credentials: $(login_hint)"
111
+ mark_blocker
112
+ fi
113
+
114
+ if [[ $AUTH_OK -eq 0 ]]; then
115
+ printf "\nRemaining checks skipped - fix authentication first.\n"
116
+ printf "\n========================================\n"
117
+ printf "RESULT: NOT READY\n"
118
+ printf "========================================\n"
119
+ printf "Required actions:\n"
120
+ i=1
121
+ for a in "${ACTIONS[@]}"; do
122
+ printf " %d. %s\n" "$i" "$a"
123
+ i=$((i + 1))
124
+ done
125
+ exit 1
126
+ fi
127
+
128
+ # ---------- 2. Service access ----------
129
+ # These cover what the CloudFormation stack touches: EKS, EC2/VPC, IAM (roles +
130
+ # Pod Identity associations), S3 (log/backup buckets), APS (managed Prometheus),
131
+ # and CloudFormation itself.
132
+ declare -a missing_access=()
133
+
134
+ aws_run eks list-clusters --region "$REGION" --output text >/dev/null \
135
+ || missing_access+=("eks:ListClusters")
136
+ aws_run ec2 describe-vpcs --region "$REGION" --max-items 5 --output text >/dev/null \
137
+ || missing_access+=("ec2:DescribeVpcs")
138
+ aws_run iam list-roles --max-items 5 --output text >/dev/null \
139
+ || missing_access+=("iam:ListRoles")
140
+ aws_run s3api list-buckets --output text >/dev/null \
141
+ || missing_access+=("s3:ListAllMyBuckets")
142
+ aws_run aps list-workspaces --region "$REGION" --output text >/dev/null \
143
+ || missing_access+=("aps:ListWorkspaces")
144
+ aws_run cloudformation list-stacks --region "$REGION" --output text >/dev/null \
145
+ || missing_access+=("cloudformation:ListStacks")
146
+
147
+ if [[ ${#missing_access[@]} -eq 0 ]]; then
148
+ row "EKS/EC2/IAM/S3/APS/CFN access" "OK"
149
+ else
150
+ row "EKS/EC2/IAM/S3/APS/CFN access" "WARN - missing: ${missing_access[*]}"
151
+ add_action "Ask your AWS admin to grant the missing IAM actions in $REGION: ${missing_access[*]}"
152
+ fi
153
+
154
+ # ---------- 3. IAM role-creation rights (CAPABILITY_NAMED_IAM) ----------
155
+ # The stack creates named IAM roles, so the deploying principal must be allowed
156
+ # to create roles and attach policies. We can't fully simulate this without
157
+ # iam:SimulatePrincipalPolicy, but we can flag whether the caller is obviously
158
+ # an admin vs. a scoped role so the operator knows to expect a capability prompt.
159
+ if aws_run iam simulate-principal-policy \
160
+ --policy-source-arn "$CALLER_ARN" \
161
+ --action-names iam:CreateRole iam:AttachRolePolicy iam:PutRolePolicy \
162
+ --query "EvaluationResults[?EvalDecision=='allowed'] | length(@)" \
163
+ --output text; then
164
+ allowed="$AWS_STDOUT"
165
+ if [[ "$allowed" == "3" ]]; then
166
+ row "IAM role-creation rights" "OK"
167
+ else
168
+ row "IAM role-creation rights" "WARN - some IAM create/attach actions denied"
169
+ add_action "The stack creates named IAM roles (deploy needs CAPABILITY_NAMED_IAM). Ensure your principal can iam:CreateRole / iam:AttachRolePolicy / iam:PutRolePolicy, or have an admin deploy."
170
+ fi
171
+ else
172
+ # SimulatePrincipalPolicy itself is often denied for non-admins; don't block.
173
+ row "IAM role-creation rights" "WARN - could not simulate (needs iam:SimulatePrincipalPolicy)"
174
+ add_action "Could not verify IAM role-creation rights. The stack creates named IAM roles and must be deployed with --capabilities CAPABILITY_NAMED_IAM by a principal allowed to create roles."
175
+ fi
176
+
177
+ # ---------- 4. EC2 on-demand vCPU quota ----------
178
+ quota_label="EC2 on-demand vCPU quota in $REGION (need ${REQUIRED_VCPU}+)"
179
+ if aws_run service-quotas get-service-quota \
180
+ --service-code ec2 \
181
+ --quota-code L-1216C47A \
182
+ --region "$REGION" \
183
+ --query "Quota.Value" \
184
+ --output text; then
185
+ quota="$AWS_STDOUT"
186
+ if [[ -z "$quota" || "$quota" == "None" ]]; then
187
+ row "$quota_label" "WARN - empty response"
188
+ add_action "Check the EC2 'Running On-Demand Standard vCPUs' quota in the AWS console: Service Quotas → EC2."
189
+ else
190
+ quota_int="${quota%.*}"
191
+ if (( quota_int < REQUIRED_VCPU )); then
192
+ row "$quota_label" "WARN ($quota available)"
193
+ add_action "Request a quota increase: AWS console → Service Quotas → EC2 → 'Running On-Demand Standard vCPUs' in $REGION."
194
+ else
195
+ row "$quota_label" "OK ($quota available)"
196
+ fi
197
+ fi
198
+ else
199
+ row "$quota_label" "WARN - could not read quota"
200
+ add_action "Manually verify EC2 vCPU quota in the AWS console (Service Quotas → EC2) for $REGION."
201
+ fi
202
+
203
+ # ---------- 5. Local tools ----------
204
+ missing_tools=()
205
+ kubectl version --client=true >/dev/null 2>&1 || missing_tools+=("kubectl")
206
+ helm version >/dev/null 2>&1 || missing_tools+=("helm")
207
+
208
+ if [[ ${#missing_tools[@]} -gt 0 ]]; then
209
+ uniq_tools="$(printf '%s\n' "${missing_tools[@]}" | sort -u | tr '\n' ' ')"
210
+ row "Local tools (kubectl, helm)" "FAIL - missing/broken: ${uniq_tools% }"
211
+ add_action "Install/repair: ${uniq_tools% }"
212
+ mark_blocker
213
+ else
214
+ row "Local tools (kubectl, helm)" "OK"
215
+ fi
216
+
217
+ # ---------- summary ----------
218
+ printf "\n========================================\n"
219
+ if [[ $BLOCKERS -eq 0 && ${#ACTIONS[@]} -eq 0 ]]; then
220
+ printf "RESULT: READY - you can deploy the CloudFormation stack.\n"
221
+ printf "========================================\n"
222
+ exit 0
223
+ elif [[ $BLOCKERS -eq 0 ]]; then
224
+ printf "RESULT: READY WITH WARNINGS\n"
225
+ printf "========================================\n"
226
+ printf "The deploy should work, but address these first if possible:\n"
227
+ else
228
+ printf "RESULT: NOT READY\n"
229
+ printf "========================================\n"
230
+ printf "Required actions:\n"
231
+ fi
232
+
233
+ i=1
234
+ for a in "${ACTIONS[@]}"; do
235
+ printf " %d. %s\n" "$i" "$a"
236
+ i=$((i + 1))
237
+ done
238
+
239
+ printf "\nRe-run this script after completing the actions above.\n"
240
+ printf "(Set VERBOSE=1 to see raw AWS error messages.)\n"
241
+
242
+ [[ $BLOCKERS -gt 0 ]] && exit 1 || exit 0
@@ -0,0 +1,13 @@
1
+ [
2
+ { "ParameterKey": "ClusterName", "ParameterValue": "rulebricks-cluster" },
3
+ { "ParameterKey": "KubernetesVersion", "ParameterValue": "1.34" },
4
+ { "ParameterKey": "NodeInstanceType", "ParameterValue": "c7gn.xlarge" },
5
+ { "ParameterKey": "NodeDesiredCapacity", "ParameterValue": "2" },
6
+ { "ParameterKey": "NodeMinSize", "ParameterValue": "2" },
7
+ { "ParameterKey": "NodeMaxSize", "ParameterValue": "4" },
8
+ { "ParameterKey": "EnableBurstPool", "ParameterValue": "true" },
9
+ { "ParameterKey": "BurstInstanceType", "ParameterValue": "c7g.8xlarge" },
10
+ { "ParameterKey": "BurstNodeMaxSize", "ParameterValue": "1" },
11
+ { "ParameterKey": "NodeVolumeSizeGiB", "ParameterValue": "50" },
12
+ { "ParameterKey": "VpcCidr", "ParameterValue": "10.0.0.0/16" }
13
+ ]