@intentius/chant-lexicon-aws 0.0.22 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/integrity.json +6 -5
- package/dist/manifest.json +1 -1
- package/dist/meta.json +1399 -213
- package/{src/skills/chant-eks.md → dist/skills/chant-aws-eks.md} +2 -2
- package/dist/skills/chant-aws.md +3 -3
- package/dist/types/index.d.ts +1394 -364
- package/package.json +5 -2
- package/src/composites/alb-shared.ts +14 -7
- package/src/composites/composites.test.ts +82 -0
- package/src/composites/fargate-alb.ts +24 -14
- package/src/composites/fargate-service.ts +16 -9
- package/src/composites/lambda-api.ts +7 -3
- package/src/composites/lambda-dynamodb.ts +8 -3
- package/src/composites/lambda-eventbridge.ts +10 -5
- package/src/composites/lambda-function.ts +10 -5
- package/src/composites/lambda-s3.ts +8 -3
- package/src/composites/lambda-sns.ts +13 -7
- package/src/composites/lambda-sqs.ts +11 -5
- package/src/composites/rds-instance.ts +15 -8
- package/src/composites/scheduled-lambda.ts +10 -5
- package/src/composites/vpc-default.ts +81 -30
- package/src/generated/index.d.ts +1394 -364
- package/src/generated/index.ts +114 -22
- package/src/generated/lexicon-aws.json +1399 -213
- package/src/plugin.ts +90 -574
- package/src/skills/chant-aws-eks.md +178 -0
- package/src/skills/chant-aws.md +430 -0
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
---
|
|
2
|
+
skill: chant-aws-eks
|
|
3
|
+
description: EKS end-to-end workflow — provision cluster, configure kubectl, deploy K8s workloads
|
|
4
|
+
user-invocable: true
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# EKS End-to-End Workflow
|
|
8
|
+
|
|
9
|
+
## Overview
|
|
10
|
+
|
|
11
|
+
This skill bridges two lexicons:
|
|
12
|
+
- **`@intentius/chant-lexicon-aws`** — EKS cluster, node groups, IAM roles, OIDC provider (CloudFormation)
|
|
13
|
+
- **`@intentius/chant-lexicon-k8s`** — Kubernetes workloads, IRSA, ALB Ingress, storage, observability (K8s YAML)
|
|
14
|
+
|
|
15
|
+
## Architecture
|
|
16
|
+
|
|
17
|
+
```
|
|
18
|
+
AWS Lexicon (CloudFormation) K8s Lexicon (kubectl apply)
|
|
19
|
+
┌────────────────────────┐ ┌────────────────────────────┐
|
|
20
|
+
│ VPC + Subnets │ │ NamespaceEnv (quotas) │
|
|
21
|
+
│ EKS Cluster │ │ AutoscaledService (app) │
|
|
22
|
+
│ Managed Node Group │──ARNs──→ │ IrsaServiceAccount (IRSA) │
|
|
23
|
+
│ OIDC Provider │ │ AlbIngress (ALB) │
|
|
24
|
+
│ IAM Roles (IRSA) │ │ EbsStorageClass (gp3) │
|
|
25
|
+
│ EKS Add-ons │ │ FluentBitAgent (logs) │
|
|
26
|
+
└────────────────────────┘ │ ExternalDnsAgent (DNS) │
|
|
27
|
+
└────────────────────────────┘
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Step 1: Provision AWS Infrastructure
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
# Build CloudFormation template
|
|
34
|
+
chant build src/infra/ --output infra.json
|
|
35
|
+
|
|
36
|
+
# See what changed since last deploy (compares current build against last snapshot's digest)
|
|
37
|
+
chant state diff staging aws
|
|
38
|
+
|
|
39
|
+
# Deploy
|
|
40
|
+
aws cloudformation deploy \
|
|
41
|
+
--template-file infra.json \
|
|
42
|
+
--stack-name my-eks-cluster \
|
|
43
|
+
--capabilities CAPABILITY_NAMED_IAM
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Key AWS resources:
|
|
47
|
+
- **EKS Cluster** — control plane
|
|
48
|
+
- **Managed Node Group** — EC2 worker nodes
|
|
49
|
+
- **OIDC Provider** — enables IRSA (IAM Roles for Service Accounts)
|
|
50
|
+
- **IAM Roles** — node role, app IRSA roles, ALB controller role
|
|
51
|
+
|
|
52
|
+
## Step 2: Configure kubectl
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
aws eks update-kubeconfig --name my-cluster --region us-east-1
|
|
56
|
+
kubectl get nodes # verify connectivity
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Step 3: Deploy K8s Workloads
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
# Build K8s manifests
|
|
63
|
+
chant build src/k8s/ --output manifests.yaml
|
|
64
|
+
|
|
65
|
+
# Apply
|
|
66
|
+
kubectl apply -f manifests.yaml
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### Key K8s composites for EKS
|
|
70
|
+
|
|
71
|
+
```typescript
|
|
72
|
+
import {
|
|
73
|
+
NamespaceEnv,
|
|
74
|
+
AutoscaledService,
|
|
75
|
+
IrsaServiceAccount,
|
|
76
|
+
AlbIngress,
|
|
77
|
+
EbsStorageClass,
|
|
78
|
+
FluentBitAgent,
|
|
79
|
+
ExternalDnsAgent,
|
|
80
|
+
} from "@intentius/chant-lexicon-k8s";
|
|
81
|
+
|
|
82
|
+
// 1. Namespace with quotas and network isolation
|
|
83
|
+
const ns = NamespaceEnv({
|
|
84
|
+
name: "prod",
|
|
85
|
+
cpuQuota: "16",
|
|
86
|
+
memoryQuota: "32Gi",
|
|
87
|
+
defaultCpuRequest: "100m",
|
|
88
|
+
defaultMemoryRequest: "128Mi",
|
|
89
|
+
defaultDenyIngress: true,
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
// 2. IRSA ServiceAccount (use IAM Role ARN from CloudFormation outputs)
|
|
93
|
+
const irsa = IrsaServiceAccount({
|
|
94
|
+
name: "app-sa",
|
|
95
|
+
iamRoleArn: "arn:aws:iam::123456789012:role/app-role", // from CF output
|
|
96
|
+
namespace: "prod",
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
// 3. Application with autoscaling
|
|
100
|
+
const app = AutoscaledService({
|
|
101
|
+
name: "api",
|
|
102
|
+
image: "api:1.0",
|
|
103
|
+
port: 8080,
|
|
104
|
+
maxReplicas: 10,
|
|
105
|
+
cpuRequest: "200m",
|
|
106
|
+
memoryRequest: "256Mi",
|
|
107
|
+
namespace: "prod",
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
// 4. ALB Ingress (use ACM cert ARN from CloudFormation outputs)
|
|
111
|
+
const ingress = AlbIngress({
|
|
112
|
+
name: "api-ingress",
|
|
113
|
+
hosts: [{ hostname: "api.example.com", paths: [{ path: "/", serviceName: "api", servicePort: 80 }] }],
|
|
114
|
+
certificateArn: "arn:aws:acm:us-east-1:123456789012:certificate/abc", // from CF output
|
|
115
|
+
namespace: "prod",
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
// 5. Storage
|
|
119
|
+
const storage = EbsStorageClass({ name: "gp3-encrypted", type: "gp3", encrypted: true });
|
|
120
|
+
|
|
121
|
+
// 6. Observability
|
|
122
|
+
const logging = FluentBitAgent({
|
|
123
|
+
logGroup: "/aws/eks/my-cluster/containers",
|
|
124
|
+
region: "us-east-1",
|
|
125
|
+
clusterName: "my-cluster",
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
// 7. DNS
|
|
129
|
+
const dns = ExternalDnsAgent({
|
|
130
|
+
iamRoleArn: "arn:aws:iam::123456789012:role/external-dns-role",
|
|
131
|
+
domainFilters: ["example.com"],
|
|
132
|
+
});
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
## Step 4: Verify
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
kubectl get pods -n prod
|
|
139
|
+
kubectl get ingress -n prod
|
|
140
|
+
kubectl logs -n amazon-cloudwatch -l app.kubernetes.io/name=fluent-bit
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## Cleanup
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
# Delete K8s workloads first
|
|
147
|
+
kubectl delete -f manifests.yaml
|
|
148
|
+
|
|
149
|
+
# Then delete AWS infrastructure
|
|
150
|
+
aws cloudformation delete-stack --stack-name my-eks-cluster
|
|
151
|
+
aws cloudformation wait stack-delete-complete --stack-name my-eks-cluster
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
## Cross-Lexicon Value Flow
|
|
155
|
+
|
|
156
|
+
CloudFormation outputs flow into K8s composite props:
|
|
157
|
+
|
|
158
|
+
| CloudFormation Output | K8s Composite Prop |
|
|
159
|
+
|----------------------|-------------------|
|
|
160
|
+
| App IAM Role ARN | `IrsaServiceAccount.iamRoleArn` |
|
|
161
|
+
| ALB Controller Role ARN | `IrsaServiceAccount.iamRoleArn` (for ALB controller SA) |
|
|
162
|
+
| ACM Certificate ARN | `AlbIngress.certificateArn` |
|
|
163
|
+
| ExternalDNS Role ARN | `ExternalDnsAgent.iamRoleArn` |
|
|
164
|
+
| EKS Cluster Name | `FluentBitAgent.clusterName`, `AdotCollector.clusterName` |
|
|
165
|
+
| EFS Filesystem ID | `EfsStorageClass.fileSystemId` |
|
|
166
|
+
|
|
167
|
+
## EKS Init Template
|
|
168
|
+
|
|
169
|
+
Scaffold a dual-lexicon EKS project:
|
|
170
|
+
|
|
171
|
+
```bash
|
|
172
|
+
chant init --lexicon aws --template eks
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
This creates:
|
|
176
|
+
- `src/infra/` — EKS cluster, node group, IAM (AWS lexicon)
|
|
177
|
+
- `src/k8s/` — namespace, app, ingress, storage (K8s lexicon)
|
|
178
|
+
- `package.json` with both `@intentius/chant-lexicon-aws` and `@intentius/chant-lexicon-k8s`
|
|
@@ -0,0 +1,430 @@
|
|
|
1
|
+
---
|
|
2
|
+
skill: chant-aws
|
|
3
|
+
description: Build, validate, and deploy CloudFormation templates from a chant project
|
|
4
|
+
user-invocable: true
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# AWS CloudFormation Operational Playbook
|
|
8
|
+
|
|
9
|
+
## How chant and CloudFormation relate
|
|
10
|
+
|
|
11
|
+
chant is a **synthesis compiler** — it compiles TypeScript source files into CloudFormation JSON (or YAML). `chant build` does not call AWS APIs; synthesis is pure and deterministic. The optional `chant state snapshot` command queries AWS APIs to capture deployment metadata (physical IDs, status, outputs) for observability. Your job as an agent is to bridge synthesis and deployment:
|
|
12
|
+
|
|
13
|
+
- Use **chant** for: build, lint, diff (local template comparison)
|
|
14
|
+
- Use **AWS CLI** for: validate-template, deploy, change sets, rollback, drift detection, and all stack operations
|
|
15
|
+
|
|
16
|
+
The source of truth for infrastructure is the TypeScript in `src/`. The generated template (`stack.json`) is an intermediate artifact.
|
|
17
|
+
|
|
18
|
+
## Build and validate
|
|
19
|
+
|
|
20
|
+
### Build the template
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
chant build src/ --output stack.json
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Options:
|
|
27
|
+
- `--format yaml` — emit YAML instead of JSON
|
|
28
|
+
- `--watch` — rebuild on source changes
|
|
29
|
+
|
|
30
|
+
### Lint the source
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
chant lint src/
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Options:
|
|
37
|
+
- `--fix` — auto-fix violations where possible
|
|
38
|
+
- `--format sarif` — SARIF output for CI integration
|
|
39
|
+
- `--watch` — re-lint on changes
|
|
40
|
+
|
|
41
|
+
### Validate with CloudFormation
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
aws cloudformation validate-template --template-body file://stack.json
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### What each step catches
|
|
48
|
+
|
|
49
|
+
| Step | Catches | When to run |
|
|
50
|
+
|------|---------|-------------|
|
|
51
|
+
| `chant lint` | Best-practice violations, security anti-patterns, naming issues | Every edit |
|
|
52
|
+
| `chant build` | TypeScript errors, missing properties, type mismatches | Before deploy |
|
|
53
|
+
| `validate-template` | CloudFormation schema errors, invalid intrinsic usage | Before deploy |
|
|
54
|
+
|
|
55
|
+
Always run all three before deploying. Lint catches things validate-template cannot (and vice versa).
|
|
56
|
+
|
|
57
|
+
## Diffing and change preview
|
|
58
|
+
|
|
59
|
+
This is the most critical section for production safety. **Never deploy to production without previewing changes.**
|
|
60
|
+
|
|
61
|
+
### Local diff
|
|
62
|
+
|
|
63
|
+
Compare your proposed template against what is currently deployed:
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
# Get the currently deployed template
|
|
67
|
+
aws cloudformation get-template --stack-name <stack-name> --query TemplateBody --output json > deployed.json
|
|
68
|
+
|
|
69
|
+
# Build the proposed template
|
|
70
|
+
chant build src/ --output proposed.json
|
|
71
|
+
|
|
72
|
+
# Diff them
|
|
73
|
+
diff deployed.json proposed.json
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### Change sets (recommended for production)
|
|
77
|
+
|
|
78
|
+
Change sets let you preview exactly what CloudFormation will do before it does it.
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
# 1. Create the change set
|
|
82
|
+
aws cloudformation create-change-set \
|
|
83
|
+
--stack-name <stack-name> \
|
|
84
|
+
--template-body file://stack.json \
|
|
85
|
+
--change-set-name review-$(date +%s) \
|
|
86
|
+
--capabilities CAPABILITY_NAMED_IAM
|
|
87
|
+
|
|
88
|
+
# 2. Wait for it to compute
|
|
89
|
+
aws cloudformation wait change-set-create-complete \
|
|
90
|
+
--stack-name <stack-name> \
|
|
91
|
+
--change-set-name review-<id>
|
|
92
|
+
|
|
93
|
+
# 3. Review the changes
|
|
94
|
+
aws cloudformation describe-change-set \
|
|
95
|
+
--stack-name <stack-name> \
|
|
96
|
+
--change-set-name review-<id>
|
|
97
|
+
|
|
98
|
+
# 4a. Execute if changes look safe
|
|
99
|
+
aws cloudformation execute-change-set \
|
|
100
|
+
--stack-name <stack-name> \
|
|
101
|
+
--change-set-name review-<id>
|
|
102
|
+
|
|
103
|
+
# 4b. Or delete if you want to abort
|
|
104
|
+
aws cloudformation delete-change-set \
|
|
105
|
+
--stack-name <stack-name> \
|
|
106
|
+
--change-set-name review-<id>
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### Interpreting change set results
|
|
110
|
+
|
|
111
|
+
Each resource change has an **Action** and a **Replacement** value. Read them together:
|
|
112
|
+
|
|
113
|
+
| Action | Replacement | Risk | Meaning |
|
|
114
|
+
|--------|-------------|------|---------|
|
|
115
|
+
| Add | — | Low | New resource will be created |
|
|
116
|
+
| Modify | False | Low | In-place update, no disruption |
|
|
117
|
+
| Modify | Conditional | **MEDIUM** | May replace depending on property — investigate further |
|
|
118
|
+
| Modify | True | **HIGH** | Resource will be DESTROYED and recreated — **data loss risk** |
|
|
119
|
+
| Remove | — | **HIGH** | Resource will be deleted |
|
|
120
|
+
|
|
121
|
+
### Properties that always cause replacement
|
|
122
|
+
|
|
123
|
+
These property changes ALWAYS destroy and recreate the resource:
|
|
124
|
+
- `BucketName` on S3 buckets
|
|
125
|
+
- `TableName` on DynamoDB tables
|
|
126
|
+
- `DBInstanceIdentifier` on RDS instances
|
|
127
|
+
- `FunctionName` on Lambda functions
|
|
128
|
+
- `CidrBlock` on VPCs and subnets
|
|
129
|
+
- `ClusterIdentifier` on Redshift clusters
|
|
130
|
+
- `DomainName` on Elasticsearch/OpenSearch domains
|
|
131
|
+
- `TopicName` on SNS topics
|
|
132
|
+
- `QueueName` on SQS queues
|
|
133
|
+
|
|
134
|
+
**CRITICAL**: When you see `Replacement: True` on any stateful resource (databases, S3 buckets, queues with messages, DynamoDB tables), ALWAYS flag this to the user and get explicit confirmation before executing. This will destroy the existing resource and all its data.
|
|
135
|
+
|
|
136
|
+
## Deploying a new stack
|
|
137
|
+
|
|
138
|
+
```bash
|
|
139
|
+
aws cloudformation deploy \
|
|
140
|
+
--template-file stack.json \
|
|
141
|
+
--stack-name <stack-name> \
|
|
142
|
+
--capabilities CAPABILITY_NAMED_IAM \
|
|
143
|
+
--parameter-overrides Env=prod Version=1.0 \
|
|
144
|
+
--tags Project=myapp Environment=prod
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### Capabilities
|
|
148
|
+
|
|
149
|
+
| Capability | When needed |
|
|
150
|
+
|------------|-------------|
|
|
151
|
+
| `CAPABILITY_IAM` | Template creates IAM resources with auto-generated names |
|
|
152
|
+
| `CAPABILITY_NAMED_IAM` | Template creates IAM resources with custom names (use this by default — it's a superset) |
|
|
153
|
+
| `CAPABILITY_AUTO_EXPAND` | Template uses macros or nested stacks with transforms |
|
|
154
|
+
|
|
155
|
+
**Recommendation**: Default to `CAPABILITY_NAMED_IAM` unless the template also uses macros, in which case use `--capabilities CAPABILITY_NAMED_IAM CAPABILITY_AUTO_EXPAND`.
|
|
156
|
+
|
|
157
|
+
### Monitoring deployment
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
# Wait for completion (blocks until done)
|
|
161
|
+
aws cloudformation wait stack-create-complete --stack-name <stack-name>
|
|
162
|
+
|
|
163
|
+
# Or poll events in real-time
|
|
164
|
+
watch -n 5 "aws cloudformation describe-stack-events --stack-name <stack-name> --max-items 10 --query 'StackEvents[].{Time:Timestamp,Resource:LogicalResourceId,Status:ResourceStatus,Reason:ResourceStatusReason}' --output table"
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
### Getting outputs
|
|
168
|
+
|
|
169
|
+
```bash
|
|
170
|
+
aws cloudformation describe-stacks \
|
|
171
|
+
--stack-name <stack-name> \
|
|
172
|
+
--query 'Stacks[0].Outputs'
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
## Updating an existing stack
|
|
176
|
+
|
|
177
|
+
### Safe path — change set workflow (production / stateful stacks)
|
|
178
|
+
|
|
179
|
+
1. Build: `chant build src/ --output stack.json`
|
|
180
|
+
2. Create change set (see Diffing section above)
|
|
181
|
+
3. Review every resource change — pay special attention to Replacement values
|
|
182
|
+
4. Get user confirmation for any destructive changes
|
|
183
|
+
5. Execute the change set
|
|
184
|
+
6. Monitor: `aws cloudformation wait stack-update-complete --stack-name <stack-name>`
|
|
185
|
+
|
|
186
|
+
### Fast path — direct deploy (dev / stateless stacks)
|
|
187
|
+
|
|
188
|
+
```bash
|
|
189
|
+
aws cloudformation deploy \
|
|
190
|
+
--template-file stack.json \
|
|
191
|
+
--stack-name <stack-name> \
|
|
192
|
+
--capabilities CAPABILITY_NAMED_IAM \
|
|
193
|
+
--no-fail-on-empty-changeset
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
The `--no-fail-on-empty-changeset` flag prevents a non-zero exit code when there are no changes (useful in CI).
|
|
197
|
+
|
|
198
|
+
### Updating parameters only (no template change)
|
|
199
|
+
|
|
200
|
+
```bash
|
|
201
|
+
aws cloudformation deploy \
|
|
202
|
+
--stack-name <stack-name> \
|
|
203
|
+
--use-previous-template \
|
|
204
|
+
--capabilities CAPABILITY_NAMED_IAM \
|
|
205
|
+
--parameter-overrides Env=staging
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
### Which path to use
|
|
209
|
+
|
|
210
|
+
| Scenario | Path |
|
|
211
|
+
|----------|------|
|
|
212
|
+
| Production stack with databases/storage | Safe path (change set) |
|
|
213
|
+
| Any stack with `Replacement: True` changes | Safe path (change set) |
|
|
214
|
+
| Dev/test stack, stateless resources only | Fast path (direct deploy) |
|
|
215
|
+
| CI/CD automated pipeline with approval gate | Safe path (change set with manual approval) |
|
|
216
|
+
| Parameter-only change, no template diff | Fast path with `--use-previous-template` |
|
|
217
|
+
|
|
218
|
+
## Rollback and recovery
|
|
219
|
+
|
|
220
|
+
### Stack states reference
|
|
221
|
+
|
|
222
|
+
| State | Meaning | Action |
|
|
223
|
+
|-------|---------|--------|
|
|
224
|
+
| `CREATE_COMPLETE` | Stack created successfully | None — healthy |
|
|
225
|
+
| `UPDATE_COMPLETE` | Update succeeded | None — healthy |
|
|
226
|
+
| `DELETE_COMPLETE` | Stack deleted | Gone — recreate if needed |
|
|
227
|
+
| `CREATE_IN_PROGRESS` | Creation underway | Wait |
|
|
228
|
+
| `UPDATE_IN_PROGRESS` | Update underway | Wait |
|
|
229
|
+
| `DELETE_IN_PROGRESS` | Deletion underway | Wait |
|
|
230
|
+
| `ROLLBACK_IN_PROGRESS` | Create failed, rolling back | Wait |
|
|
231
|
+
| `UPDATE_ROLLBACK_IN_PROGRESS` | Update failed, rolling back | Wait |
|
|
232
|
+
| `CREATE_FAILED` | Creation failed (rare) | Check events, delete stack |
|
|
233
|
+
| `ROLLBACK_COMPLETE` | Create failed, rollback finished | **Must delete and recreate** — cannot update |
|
|
234
|
+
| `ROLLBACK_FAILED` | Create rollback failed | Check events, may need manual cleanup |
|
|
235
|
+
| `UPDATE_ROLLBACK_COMPLETE` | Update failed, rolled back to previous | Healthy — fix template and try again |
|
|
236
|
+
| `UPDATE_ROLLBACK_FAILED` | Update rollback itself failed | **See recovery steps below** |
|
|
237
|
+
| `DELETE_FAILED` | Deletion failed | Check events, retry or use retain |
|
|
238
|
+
|
|
239
|
+
### Recovering from UPDATE_ROLLBACK_FAILED
|
|
240
|
+
|
|
241
|
+
This is the most common "stuck" state. A resource that CloudFormation tried to roll back could not be restored.
|
|
242
|
+
|
|
243
|
+
**Step 1**: Identify the stuck resource:
|
|
244
|
+
|
|
245
|
+
```bash
|
|
246
|
+
aws cloudformation describe-stack-events \
|
|
247
|
+
--stack-name <stack-name> \
|
|
248
|
+
--query "StackEvents[?ResourceStatus=='UPDATE_FAILED'].[LogicalResourceId,ResourceStatusReason]" \
|
|
249
|
+
--output table
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
**Step 2a** — Try continuing the rollback:
|
|
253
|
+
|
|
254
|
+
```bash
|
|
255
|
+
aws cloudformation continue-update-rollback --stack-name <stack-name>
|
|
256
|
+
aws cloudformation wait stack-update-complete --stack-name <stack-name>
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
**Step 2b** — If that fails, skip the stuck resources:
|
|
260
|
+
|
|
261
|
+
```bash
|
|
262
|
+
aws cloudformation continue-update-rollback \
|
|
263
|
+
--stack-name <stack-name> \
|
|
264
|
+
--resources-to-skip LogicalResourceId1 LogicalResourceId2
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
**WARNING**: Skipping resources causes state divergence — CloudFormation's view of the stack will no longer match reality. You may need to manually clean up skipped resources or import them back later.
|
|
268
|
+
|
|
269
|
+
### Recovering from ROLLBACK_COMPLETE
|
|
270
|
+
|
|
271
|
+
A stack in `ROLLBACK_COMPLETE` cannot be updated. You must delete it and create a new one:
|
|
272
|
+
|
|
273
|
+
```bash
|
|
274
|
+
aws cloudformation delete-stack --stack-name <stack-name>
|
|
275
|
+
aws cloudformation wait stack-delete-complete --stack-name <stack-name>
|
|
276
|
+
# Then deploy fresh
|
|
277
|
+
aws cloudformation deploy --template-file stack.json --stack-name <stack-name> --capabilities CAPABILITY_NAMED_IAM
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
## Stack lifecycle operations
|
|
281
|
+
|
|
282
|
+
### Delete a stack
|
|
283
|
+
|
|
284
|
+
```bash
|
|
285
|
+
aws cloudformation delete-stack --stack-name <stack-name>
|
|
286
|
+
aws cloudformation wait stack-delete-complete --stack-name <stack-name>
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
If deletion fails because a resource cannot be deleted (e.g., non-empty S3 bucket), use retain:
|
|
290
|
+
|
|
291
|
+
```bash
|
|
292
|
+
aws cloudformation delete-stack \
|
|
293
|
+
--stack-name <stack-name> \
|
|
294
|
+
--retain-resources BucketLogicalId
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
To protect a stack from accidental deletion:
|
|
298
|
+
|
|
299
|
+
```bash
|
|
300
|
+
aws cloudformation update-termination-protection \
|
|
301
|
+
--enable-termination-protection \
|
|
302
|
+
--stack-name <stack-name>
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
### Drift detection
|
|
306
|
+
|
|
307
|
+
Detect whether resources have been modified outside of CloudFormation:
|
|
308
|
+
|
|
309
|
+
```bash
|
|
310
|
+
# Start detection
|
|
311
|
+
DRIFT_ID=$(aws cloudformation detect-stack-drift --stack-name <stack-name> --query StackDriftDetectionId --output text)
|
|
312
|
+
|
|
313
|
+
# Check status
|
|
314
|
+
aws cloudformation describe-stack-drift-detection-status --stack-drift-detection-id $DRIFT_ID
|
|
315
|
+
|
|
316
|
+
# View drifted resources
|
|
317
|
+
aws cloudformation describe-stack-resource-drifts \
|
|
318
|
+
--stack-name <stack-name> \
|
|
319
|
+
--stack-resource-drift-status-filters MODIFIED DELETED
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
### Import existing resources
|
|
323
|
+
|
|
324
|
+
Bring resources that were created outside CloudFormation under stack management:
|
|
325
|
+
|
|
326
|
+
```bash
|
|
327
|
+
aws cloudformation create-change-set \
|
|
328
|
+
--stack-name <stack-name> \
|
|
329
|
+
--template-body file://stack.json \
|
|
330
|
+
--change-set-name import-resources \
|
|
331
|
+
--change-set-type IMPORT \
|
|
332
|
+
--resources-to-import '[{"ResourceType":"AWS::S3::Bucket","LogicalResourceId":"MyBucket","ResourceIdentifier":{"BucketName":"existing-bucket-name"}}]'
|
|
333
|
+
```
|
|
334
|
+
|
|
335
|
+
## Troubleshooting decision tree
|
|
336
|
+
|
|
337
|
+
When a deployment fails, follow this diagnostic flow:
|
|
338
|
+
|
|
339
|
+
### Step 1: Check the stack status
|
|
340
|
+
|
|
341
|
+
```bash
|
|
342
|
+
aws cloudformation describe-stacks --stack-name <stack-name> --query 'Stacks[0].StackStatus' --output text
|
|
343
|
+
```
|
|
344
|
+
|
|
345
|
+
### Step 2: Branch on status
|
|
346
|
+
|
|
347
|
+
- **`*_IN_PROGRESS`** — Wait. Do not take action while an operation is in progress.
|
|
348
|
+
- **`*_FAILED` or `*_ROLLBACK_*`** — Read the events (Step 3).
|
|
349
|
+
- **`*_COMPLETE`** — Stack is stable. If behavior is wrong, check resource configuration.
|
|
350
|
+
|
|
351
|
+
### Step 3: Read the failure events
|
|
352
|
+
|
|
353
|
+
```bash
|
|
354
|
+
aws cloudformation describe-stack-events \
|
|
355
|
+
--stack-name <stack-name> \
|
|
356
|
+
--query "StackEvents[?contains(ResourceStatus, 'FAILED')].[LogicalResourceId,ResourceStatusReason]" \
|
|
357
|
+
--output table
|
|
358
|
+
```
|
|
359
|
+
|
|
360
|
+
### Step 4: Diagnose by error pattern
|
|
361
|
+
|
|
362
|
+
| Error pattern | Likely cause | Fix |
|
|
363
|
+
|---------------|-------------|-----|
|
|
364
|
+
| "already exists" | Resource name collision — another stack or manual creation owns this name | Use dynamic names: `Sub\`\${AWS.StackName}-myresource\`` |
|
|
365
|
+
| "not authorized" or "AccessDenied" | Missing IAM permissions, SCP restriction, or wrong `--capabilities` | Check IAM policy, add `--capabilities CAPABILITY_NAMED_IAM` |
|
|
366
|
+
| "limit exceeded" or "LimitExceededException" | AWS service quota hit | Request quota increase or reduce resource count |
|
|
367
|
+
| "Template error" or "Template format error" | Invalid template syntax | Run `aws cloudformation validate-template` and `chant lint src/` |
|
|
368
|
+
| "Circular dependency" | Two resources reference each other | Break the cycle — extract one reference to an output or parameter |
|
|
369
|
+
| "is in UPDATE_ROLLBACK_FAILED state and can not be updated" | Stuck rollback | See UPDATE_ROLLBACK_FAILED recovery above |
|
|
370
|
+
| "is in ROLLBACK_COMPLETE state and can not be updated" | Failed creation, rolled back | Delete the stack and recreate |
|
|
371
|
+
| "No updates are to be performed" | Template unchanged | Use `--no-fail-on-empty-changeset` or verify your changes are in the built template |
|
|
372
|
+
| "Resource is not in the state" | Resource was modified outside CF | Run drift detection, then update or import |
|
|
373
|
+
| "Maximum number of addresses has been reached" | EIP limit (default 5) | Request EIP quota increase |
|
|
374
|
+
|
|
375
|
+
## Quick reference
|
|
376
|
+
|
|
377
|
+
### Stack info commands
|
|
378
|
+
|
|
379
|
+
```bash
|
|
380
|
+
# List all stacks
|
|
381
|
+
aws cloudformation list-stacks --stack-status-filter CREATE_COMPLETE UPDATE_COMPLETE
|
|
382
|
+
|
|
383
|
+
# Describe a stack (status, params, outputs, tags)
|
|
384
|
+
aws cloudformation describe-stacks --stack-name <stack-name>
|
|
385
|
+
|
|
386
|
+
# List resources in a stack
|
|
387
|
+
aws cloudformation list-stack-resources --stack-name <stack-name>
|
|
388
|
+
|
|
389
|
+
# Get outputs only
|
|
390
|
+
aws cloudformation describe-stacks --stack-name <stack-name> --query 'Stacks[0].Outputs'
|
|
391
|
+
|
|
392
|
+
# Recent events
|
|
393
|
+
aws cloudformation describe-stack-events --stack-name <stack-name> --max-items 20
|
|
394
|
+
|
|
395
|
+
# Get deployed template
|
|
396
|
+
aws cloudformation get-template --stack-name <stack-name> --query TemplateBody
|
|
397
|
+
```
|
|
398
|
+
|
|
399
|
+
### Full build-to-deploy pipeline
|
|
400
|
+
|
|
401
|
+
```bash
|
|
402
|
+
# 1. Lint
|
|
403
|
+
chant lint src/
|
|
404
|
+
|
|
405
|
+
# 2. Build
|
|
406
|
+
chant build src/ --output stack.json
|
|
407
|
+
|
|
408
|
+
# 3. Validate
|
|
409
|
+
aws cloudformation validate-template --template-body file://stack.json
|
|
410
|
+
|
|
411
|
+
# 4. Create change set
|
|
412
|
+
aws cloudformation create-change-set \
|
|
413
|
+
--stack-name <stack-name> \
|
|
414
|
+
--template-body file://stack.json \
|
|
415
|
+
--change-set-name deploy-$(date +%s) \
|
|
416
|
+
--capabilities CAPABILITY_NAMED_IAM
|
|
417
|
+
|
|
418
|
+
# 5. Review changes
|
|
419
|
+
aws cloudformation describe-change-set \
|
|
420
|
+
--stack-name <stack-name> \
|
|
421
|
+
--change-set-name deploy-<id>
|
|
422
|
+
|
|
423
|
+
# 6. Execute (after user confirms)
|
|
424
|
+
aws cloudformation execute-change-set \
|
|
425
|
+
--stack-name <stack-name> \
|
|
426
|
+
--change-set-name deploy-<id>
|
|
427
|
+
|
|
428
|
+
# 7. Wait for completion
|
|
429
|
+
aws cloudformation wait stack-update-complete --stack-name <stack-name>
|
|
430
|
+
```
|