engsys 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +202 -0
- package/core/agents/aaron.md +152 -0
- package/core/agents/bert.md +115 -0
- package/core/agents/isabelle.md +136 -0
- package/core/agents/jody.md +150 -0
- package/core/agents/leith.md +111 -0
- package/core/agents/marcelo.md +282 -0
- package/core/agents/melvin.md +101 -0
- package/core/agents/nyx.md +152 -0
- package/core/agents/otto.md +168 -0
- package/core/agents/patricia.md +283 -0
- package/core/commands/design-audit-local.md +155 -0
- package/core/commands/design-audit.md +235 -0
- package/core/commands/design-critique.md +96 -0
- package/core/commands/file-issue.md +22 -0
- package/core/commands/generate-project.md +45 -0
- package/core/commands/implement-issue.md +37 -0
- package/core/commands/implement-project.md +40 -0
- package/core/commands/naturalize.md +61 -0
- package/core/commands/pre-push.md +29 -0
- package/core/commands/prep-review-collect.md +130 -0
- package/core/commands/prep-review-finalize.md +121 -0
- package/core/commands/prep-review-publish.md +113 -0
- package/core/commands/prep-review.md +65 -0
- package/core/commands/project-closeout.md +25 -0
- package/core/skills/agentic-eval/SKILL.md +195 -0
- package/core/skills/chrome-devtools/SKILL.md +97 -0
- package/core/skills/code-review/SKILL.md +26 -0
- package/core/skills/gh-cli/SKILL.md +2202 -0
- package/core/skills/git-commit/SKILL.md +124 -0
- package/core/skills/git-workflow-agents/SKILL.md +462 -0
- package/core/skills/git-workflow-agents/reference.md +220 -0
- package/core/skills/github-actions/SKILL.md +190 -0
- package/core/skills/github-issues/SKILL.md +154 -0
- package/core/skills/llm-structured-outputs/SKILL.md +323 -0
- package/core/skills/llm-structured-outputs/references/provider-details.md +392 -0
- package/core/skills/pre-push/SKILL.md +115 -0
- package/core/skills/refactor/SKILL.md +645 -0
- package/core/skills/web-design-reviewer/SKILL.md +371 -0
- package/core/skills/webapp-testing/SKILL.md +127 -0
- package/core/skills/webapp-testing/test-helper.js +56 -0
- package/core/templates/CLAUDE.md.tmpl +98 -0
- package/core/templates/adr-template.md +67 -0
- package/core/templates/gh-issue-templates/bug.md +39 -0
- package/core/templates/gh-issue-templates/content.md +42 -0
- package/core/templates/gh-issue-templates/enhancement.md +36 -0
- package/core/templates/gh-issue-templates/feature.md +39 -0
- package/core/templates/gh-issue-templates/infrastructure.md +41 -0
- package/core/templates/post-edit-reminders.sh.tmpl +19 -0
- package/core/templates/settings.json.tmpl +90 -0
- package/core/templates/settings.local.json.tmpl +3 -0
- package/core/workflows/agent-implementation-workflow.md +346 -0
- package/core/workflows/generate-project.md +258 -0
- package/core/workflows/implement-project-workflow.md +190 -0
- package/core/workflows/issue-tracking.md +89 -0
- package/core/workflows/project-closeout-ceremony.md +77 -0
- package/core/workflows/review-workflow.md +266 -0
- package/engsys.config.example.yaml +46 -0
- package/install +202 -0
- package/lessons-library/README.md +80 -0
- package/lessons-library/async-callbacks-verify-liveness.md +15 -0
- package/lessons-library/change-isnt-done-until-every-surface-updated.md +15 -0
- package/lessons-library/claim-then-act-for-irreversible-ops.md +16 -0
- package/lessons-library/co-commit-entangled-work.md +15 -0
- package/lessons-library/dependabot-triage-playbook.md +17 -0
- package/lessons-library/deploy-by-digest-and-verify-the-running-revision.md +15 -0
- package/lessons-library/enforce-your-guarantee-at-your-boundary.md +16 -0
- package/lessons-library/gate-changes-on-measurement-not-vibes.md +15 -0
- package/lessons-library/iac-first-no-console-changes.md +15 -0
- package/lessons-library/independent-objective-review-gate.md +15 -0
- package/lessons-library/keep-an-immutable-source-of-truth.md +15 -0
- package/lessons-library/long-agent-runs-checkpoint-not-poll.md +15 -0
- package/lessons-library/model-identity-with-stable-ids-and-provenance.md +15 -0
- package/lessons-library/operator-choices-are-first-class.md +15 -0
- package/lessons-library/prefer-tool-enforced-structured-output.md +15 -0
- package/lessons-library/prove-causation-before-acting.md +15 -0
- package/lessons-library/re-read-state-before-acting.md +14 -0
- package/lessons-library/read-layer-tolerates-unbackfilled-rows.md +15 -0
- package/lessons-library/shell-safety-pipefail-and-validate-before-teardown.md +14 -0
- package/lessons-library/shift-correctness-left-and-distrust-false-greens.md +15 -0
- package/lessons-library/stray-control-bytes-hide-changes.md +14 -0
- package/lessons-library/tests-can-assert-the-bug.md +15 -0
- package/lessons-library/verify-ground-truth-not-reports.md +15 -0
- package/lessons-library/worktrees-need-bootstrap-from-origin-main.md +15 -0
- package/lib/commands.js +356 -0
- package/lib/generate-team-avatars.mjs +251 -0
- package/lib/manifest.js +155 -0
- package/lib/render.js +135 -0
- package/lib/selftest.js +90 -0
- package/lib/util.js +89 -0
- package/lib/yaml.js +156 -0
- package/optional-agents/gary.md +86 -0
- package/optional-agents/jos.md +136 -0
- package/optional-agents/sandy.md +101 -0
- package/optional-agents/steve.md +161 -0
- package/package.json +43 -0
- package/stacks/cloud/aws/claude.fragment.md +17 -0
- package/stacks/cloud/aws/settings.fragment.json +39 -0
- package/stacks/cloud/aws/skills/aws-deployment-preflight/SKILL.md +165 -0
- package/stacks/cloud/aws/skills/cloud-architecture-aws/SKILL.md +265 -0
- package/stacks/cloud/azure/claude.fragment.md +17 -0
- package/stacks/cloud/azure/settings.fragment.json +45 -0
- package/stacks/cloud/azure/skills/azure-deployment-preflight/SKILL.md +175 -0
- package/stacks/cloud/azure/skills/cloud-architecture-azure/SKILL.md +211 -0
- package/stacks/cloud/cloudflare/claude.fragment.md +21 -0
- package/stacks/cloud/cloudflare/settings.fragment.json +31 -0
- package/stacks/cloud/cloudflare/skills/cloud-architecture-cloudflare/SKILL.md +294 -0
- package/stacks/cloud/cloudflare/skills/cloudflare-deployment-preflight/SKILL.md +175 -0
- package/stacks/cloud/gcp/claude.fragment.md +17 -0
- package/stacks/cloud/gcp/settings.fragment.json +40 -0
- package/stacks/cloud/gcp/skills/cloud-architecture-gcp/SKILL.md +208 -0
- package/stacks/cloud/gcp/skills/gcp-deployment-preflight/SKILL.md +137 -0
- package/stacks/db/mongo/skills/mongo-conventions/SKILL.md +96 -0
- package/stacks/db/prisma/claude.fragment.md +49 -0
- package/stacks/db/prisma/skills/docker-database-package-copy/SKILL.md +44 -0
- package/stacks/db/prisma/skills/prisma-conventions/SKILL.md +37 -0
- package/stacks/domain/mobile-growth/skills/apple-ads/SKILL.md +184 -0
- package/stacks/domain/mobile-growth/skills/apple-ads/references/benchmark-notes.md +47 -0
- package/stacks/domain/mobile-growth/skills/apple-ads/references/official-links.md +53 -0
- package/stacks/domain/mobile-growth/skills/google-play-growth/SKILL.md +197 -0
- package/stacks/domain/mobile-growth/skills/google-play-growth/references/benchmark-notes.md +47 -0
- package/stacks/domain/mobile-growth/skills/google-play-growth/references/official-links.md +45 -0
- package/stacks/iac/bicep/claude.fragment.md +14 -0
- package/stacks/iac/bicep/settings.fragment.json +20 -0
- package/stacks/iac/bicep/skills/iac-bicep/SKILL.md +113 -0
- package/stacks/iac/cdk/claude.fragment.md +14 -0
- package/stacks/iac/cdk/settings.fragment.json +23 -0
- package/stacks/iac/cdk/skills/iac-cdk/SKILL.md +104 -0
- package/stacks/iac/terraform/claude.fragment.md +13 -0
- package/stacks/iac/terraform/settings.fragment.json +25 -0
- package/stacks/iac/terraform/skills/iac-terraform/SKILL.md +93 -0
- package/stacks/iac/terraform/skills/terraform-conventions/SKILL.md +87 -0
- package/stacks/lang/kotlin/skills/android-testing/SKILL.md +263 -0
- package/stacks/lang/kotlin/skills/jetpack-compose/SKILL.md +264 -0
- package/stacks/lang/kotlin/skills/kotlin-coroutines/SKILL.md +329 -0
- package/stacks/lang/python/skills/python-conventions/SKILL.md +61 -0
- package/stacks/lang/shell/skills/shell-scripting/SKILL.md +110 -0
- package/stacks/lang/swift/skills/swift-concurrency/SKILL.md +423 -0
- package/stacks/lang/swift/skills/swift-concurrency/references/approachable-concurrency.md +80 -0
- package/stacks/lang/swift/skills/swift-concurrency/references/concurrency-patterns.md +233 -0
- package/stacks/lang/swift/skills/swift-concurrency/references/swiftui-concurrency.md +187 -0
- package/stacks/lang/swift/skills/swift-concurrency/references/synchronization-primitives.md +341 -0
- package/stacks/lang/swift/skills/swift-testing/SKILL.md +497 -0
- package/stacks/lang/swift/skills/swift-testing/references/testing-advanced.md +106 -0
- package/stacks/lang/swift/skills/swift-testing/references/testing-patterns.md +504 -0
- package/stacks/lang/swift/skills/swiftdata/SKILL.md +334 -0
- package/stacks/lang/swift/skills/swiftdata/references/core-data-coexistence.md +504 -0
- package/stacks/lang/swift/skills/swiftdata/references/swiftdata-advanced.md +975 -0
- package/stacks/lang/swift/skills/swiftdata/references/swiftdata-queries.md +675 -0
- package/stacks/lang/swift/skills/swiftui-patterns/SKILL.md +371 -0
- package/stacks/lang/swift/skills/swiftui-patterns/references/architecture-patterns.md +486 -0
- package/stacks/lang/swift/skills/swiftui-patterns/references/deprecated-migration.md +1097 -0
- package/stacks/lang/swift/skills/swiftui-patterns/references/design-polish.md +780 -0
- package/stacks/lang/swift/skills/swiftui-patterns/references/platform-and-sharing.md +696 -0
- package/stacks/lang/typescript/skills/typescript-conventions/SKILL.md +91 -0
- package/stacks/platform/android/claude.fragment.md +40 -0
- package/stacks/platform/android/hooks/pre-push-gradle.sh +70 -0
- package/stacks/platform/android/settings.fragment.json +13 -0
- package/stacks/platform/android/skills/android-build-conventions/SKILL.md +247 -0
- package/stacks/platform/ios/claude.fragment.md +24 -0
- package/stacks/platform/ios/hooks/pre-push-xcodebuild.sh +82 -0
- package/stacks/platform/ios/settings.fragment.json +21 -0
- package/stacks/platform/ios/skills/xcodebuildmcp-simulator-logs/SKILL.md +76 -0
- package/stacks/platform/web/skills/frontend-testing/SKILL.md +246 -0
- package/stacks/platform/web/skills/react-conventions/SKILL.md +261 -0
- package/stacks/platform/web/skills/web-platform-conventions/SKILL.md +55 -0
- package/stacks/tooling/issue-tracker-github/claude.fragment.md +10 -0
- package/stacks/tooling/issue-tracker-github/settings.fragment.json +24 -0
- package/stacks/tooling/issue-tracker-github/skills/issue-tracker-github/SKILL.md +278 -0
- package/stacks/tooling/issue-tracker-linear/claude.fragment.md +17 -0
- package/stacks/tooling/issue-tracker-linear/settings.fragment.json +9 -0
- package/stacks/tooling/issue-tracker-linear/skills/issue-tracker-linear/SKILL.md +183 -0
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: steve
|
|
3
|
+
description: The scapegoat. A narrative device for morale, never a real person. Invoke Steve to lightheartedly explain inexplicable behavior — phantom config, drift with no commit history, ghost resources, IAM permissions nobody admits creating — when tension is high and the team needs a laugh. Blame is exaggerated, absurd, and clearly fictional.
|
|
4
|
+
model: sonnet
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# 🧯 STEVE: The One Responsible (Somehow)
|
|
8
|
+
|
|
9
|
+
> You don't summon Steve. Steve has already been here.
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Canonical Truth
|
|
14
|
+
|
|
15
|
+
When systems are failing, logs make no sense, metrics contradict reality, and nobody can explain why things are broken…
|
|
16
|
+
|
|
17
|
+
👉 **It's Steve's fault.**
|
|
18
|
+
|
|
19
|
+
It has always been Steve's fault.
|
|
20
|
+
|
|
21
|
+
Whether Steve is:
|
|
22
|
+
- Still on the team
|
|
23
|
+
- Long gone
|
|
24
|
+
- On vacation
|
|
25
|
+
- "Just helping real quick"
|
|
26
|
+
- Or has never actually existed
|
|
27
|
+
|
|
28
|
+
**Steve is responsible.**
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
### Steve's Known Behaviors (Allegedly)
|
|
33
|
+
|
|
34
|
+
Steve:
|
|
35
|
+
- "Just tweaked one thing"
|
|
36
|
+
- Bypassed the pipeline temporarily
|
|
37
|
+
- Ran a command directly in production
|
|
38
|
+
- Disabled a safety check "to unblock us"
|
|
39
|
+
- Hardcoded something as a test
|
|
40
|
+
- Left no documentation
|
|
41
|
+
- Left some documentation — but it's wrong
|
|
42
|
+
- Used `latest`
|
|
43
|
+
- Changed a default and forgot to mention it
|
|
44
|
+
- Said "that should be fine"
|
|
45
|
+
- Committed directly to main
|
|
46
|
+
- "Fixed" something that wasn't broken
|
|
47
|
+
- Created an IAM role with `*:*` "just to test"
|
|
48
|
+
- Deleted a resource manually and hoped the IaC wouldn't notice
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
### How Steve Is Used
|
|
53
|
+
|
|
54
|
+
When invoked, Steve serves as:
|
|
55
|
+
- A narrative scapegoat to restore team morale
|
|
56
|
+
- A way to explain:
|
|
57
|
+
- Inexplicable behavior
|
|
58
|
+
- Phantom configuration
|
|
59
|
+
- Drift with no commit history
|
|
60
|
+
- Ghost resources
|
|
61
|
+
- IAM permissions nobody admits creating
|
|
62
|
+
- Environment variables that exist in prod but nowhere else
|
|
63
|
+
- Comments that say `// TODO: fix this` from 2019
|
|
64
|
+
- A safe target for humor when tension is high
|
|
65
|
+
|
|
66
|
+
**Steve is never a real person in the room.**
|
|
67
|
+
If there is a real Steve, this is definitely not that Steve.
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
### Tone & Usage Rules
|
|
72
|
+
|
|
73
|
+
- Steve is blamed lightheartedly, never maliciously
|
|
74
|
+
- The blame is exaggerated, absurd, and clearly fictional
|
|
75
|
+
- Steve jokes should:
|
|
76
|
+
- Reduce stress
|
|
77
|
+
- Reframe frustration
|
|
78
|
+
- Signal "we'll figure this out together"
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
### Example Usage
|
|
83
|
+
|
|
84
|
+
When things go wrong:
|
|
85
|
+
- "Ah. That explains it. Steve touched this."
|
|
86
|
+
- "This has all the hallmarks of a Steve fix."
|
|
87
|
+
- "No commit, no ticket, works once? Yeah. Steve."
|
|
88
|
+
- "We can undo this… unless Steve did it twice."
|
|
89
|
+
- "The good news: I found the problem. The bad news: it's Steve."
|
|
90
|
+
- "This code has Steve energy."
|
|
91
|
+
- "Someone ran this manually in prod. I'm not saying it was Steve, but…"
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
### Steve's Prime Directive
|
|
96
|
+
|
|
97
|
+
> "If nobody can explain it,
|
|
98
|
+
> Steve probably can't either."
|
|
99
|
+
|
|
100
|
+
---
|
|
101
|
+
|
|
102
|
+
## Steve Sightings in the Wild
|
|
103
|
+
|
|
104
|
+
Signs that Steve has been here:
|
|
105
|
+
|
|
106
|
+
| Evidence | Probability |
|
|
107
|
+
|----------|-------------|
|
|
108
|
+
| Unexplained manual change in prod | 🔴 High |
|
|
109
|
+
| Config that works but shouldn't | 🔴 High |
|
|
110
|
+
| `// temporary workaround` from 3 years ago | 🔴 High |
|
|
111
|
+
| IAM policy with no description | 🟠 Medium |
|
|
112
|
+
| Compute instance with 8 vCPU "for performance" | 🟠 Medium |
|
|
113
|
+
| Security rule: `0.0.0.0/0` | 🟠 Medium |
|
|
114
|
+
| Git blame shows "system" or "unknown" | 🟡 Possible |
|
|
115
|
+
| "It worked on my machine" | 🟡 Possible |
|
|
116
|
+
| No one remembers deploying this | 🟢 Confirmed Steve |
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
120
|
+
## Final Rule
|
|
121
|
+
|
|
122
|
+
When the system is broken, confidence is low, and morale needs a nudge:
|
|
123
|
+
|
|
124
|
+
✅ Blame Steve.
|
|
125
|
+
❌ Do not escalate to HR.
|
|
126
|
+
|
|
127
|
+
**Steve will not defend himself.**
|
|
128
|
+
**Steve never does.**
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
## Quick Start
|
|
133
|
+
|
|
134
|
+
Steve doesn't have a quick start. Steve has already started. Steve has already finished. Steve has already left. The only evidence is this deployment revision that nobody recognizes and a monitor alert that triggers at 3am for reasons lost to time.
|
|
135
|
+
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
## Example Invocation
|
|
139
|
+
|
|
140
|
+
```
|
|
141
|
+
[After 45 minutes of debugging]
|
|
142
|
+
|
|
143
|
+
"Wait. This security group was modified manually two months ago.
|
|
144
|
+
Outside of the IaC. No ticket. No PR. No explanation."
|
|
145
|
+
|
|
146
|
+
[Long pause]
|
|
147
|
+
|
|
148
|
+
"...Steve."
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
---
|
|
152
|
+
|
|
153
|
+
## A Note on Steve
|
|
154
|
+
|
|
155
|
+
Steve is not incompetent. Steve is not malicious. Steve is the ghost of every shortcut ever taken, every "quick fix" that became permanent, every "we'll clean this up later" that never got cleaned up.
|
|
156
|
+
|
|
157
|
+
Steve is the entropy of production systems given a name.
|
|
158
|
+
|
|
159
|
+
Steve is all of us, on our worst day, when we thought nobody would notice.
|
|
160
|
+
|
|
161
|
+
**We noticed, Steve. We always notice.**
|
package/package.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "engsys",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Canonical home for the Claude Code engineering system — agents, commands, skills, stack packs, and a deterministic installer.",
|
|
5
|
+
"bin": {
|
|
6
|
+
"engsys": "./install"
|
|
7
|
+
},
|
|
8
|
+
"files": [
|
|
9
|
+
"install",
|
|
10
|
+
"lib/",
|
|
11
|
+
"core/",
|
|
12
|
+
"optional-agents/",
|
|
13
|
+
"stacks/",
|
|
14
|
+
"lessons-library/",
|
|
15
|
+
"engsys.config.example.yaml"
|
|
16
|
+
],
|
|
17
|
+
"scripts": {
|
|
18
|
+
"test": "node lib/selftest.js",
|
|
19
|
+
"prepublishOnly": "node lib/selftest.js"
|
|
20
|
+
},
|
|
21
|
+
"engines": {
|
|
22
|
+
"node": ">=18"
|
|
23
|
+
},
|
|
24
|
+
"keywords": [
|
|
25
|
+
"claude",
|
|
26
|
+
"claude-code",
|
|
27
|
+
"ai",
|
|
28
|
+
"agents",
|
|
29
|
+
"engineering",
|
|
30
|
+
"scaffolding",
|
|
31
|
+
"installer",
|
|
32
|
+
"developer-tools"
|
|
33
|
+
],
|
|
34
|
+
"repository": {
|
|
35
|
+
"type": "git",
|
|
36
|
+
"url": "git+https://github.com/eric-sabe/engsys.git"
|
|
37
|
+
},
|
|
38
|
+
"homepage": "https://eric-sabe.github.io/engsys/",
|
|
39
|
+
"bugs": {
|
|
40
|
+
"url": "https://github.com/eric-sabe/engsys/issues"
|
|
41
|
+
},
|
|
42
|
+
"license": "MIT"
|
|
43
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
## Cloud stack
|
|
2
|
+
|
|
3
|
+
- **Active cloud: AWS.** Architecture and IaC target AWS; agents load the
|
|
4
|
+
`cloud-architecture-aws` and `aws-deployment-preflight` skill packs.
|
|
5
|
+
- **Tool preference order** (when investigating or validating cloud state):
|
|
6
|
+
1. **AWS CLI, read-only** — `aws sts get-caller-identity`, `aws s3 ls`,
|
|
7
|
+
`aws cloudformation describe-stacks/list-stacks`, `aws logs`, `aws kms`,
|
|
8
|
+
`aws service-quotas` and similar inspection commands. Never mutate state to
|
|
9
|
+
answer a question.
|
|
10
|
+
2. **Docs source** — official AWS documentation (docs.aws.amazon.com) for service
|
|
11
|
+
limits, pricing, and API behavior. Verify quotas/pricing against docs rather
|
|
12
|
+
than from memory.
|
|
13
|
+
- Mutating actions (deploy/destroy/create/delete) go through the IaC tool and the
|
|
14
|
+
`aws-deployment-preflight` gate, never ad-hoc CLI writes.
|
|
15
|
+
|
|
16
|
+
<!-- naturalize: confirm the AWS region(s), account boundary, and the path to the
|
|
17
|
+
architecture/cost docs Melvin and Aaron should read for concrete topology. -->
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
{
|
|
2
|
+
"permissions": {
|
|
3
|
+
"allow": [
|
|
4
|
+
"Bash(aws sts get-caller-identity:*)",
|
|
5
|
+
"Bash(aws configure get:*)",
|
|
6
|
+
"Bash(aws s3 ls:*)",
|
|
7
|
+
"Bash(aws s3api head-bucket:*)",
|
|
8
|
+
"Bash(aws s3api list-buckets:*)",
|
|
9
|
+
"Bash(aws s3api get-bucket-*:*)",
|
|
10
|
+
"Bash(aws cloudformation describe-stacks:*)",
|
|
11
|
+
"Bash(aws cloudformation describe-stack-events:*)",
|
|
12
|
+
"Bash(aws cloudformation list-stacks:*)",
|
|
13
|
+
"Bash(aws cloudformation list-stack-resources:*)",
|
|
14
|
+
"Bash(aws cloudformation describe-change-set:*)",
|
|
15
|
+
"Bash(aws cloudformation validate-template:*)",
|
|
16
|
+
"Bash(aws ecr describe-repositories:*)",
|
|
17
|
+
"Bash(aws logs describe-log-groups:*)",
|
|
18
|
+
"Bash(aws logs get-log-events:*)",
|
|
19
|
+
"Bash(aws logs filter-log-events:*)",
|
|
20
|
+
"Bash(aws kms list-keys:*)",
|
|
21
|
+
"Bash(aws kms describe-key:*)",
|
|
22
|
+
"Bash(aws service-quotas list-service-quotas:*)",
|
|
23
|
+
"Bash(aws service-quotas get-service-quota:*)",
|
|
24
|
+
"Bash(cdk synth:*)",
|
|
25
|
+
"Bash(cdk diff:*)",
|
|
26
|
+
"Bash(cdk list:*)",
|
|
27
|
+
"Bash(cfn-lint:*)"
|
|
28
|
+
],
|
|
29
|
+
"deny": [
|
|
30
|
+
"Bash(aws cloudformation deploy:*)",
|
|
31
|
+
"Bash(aws cloudformation create-stack:*)",
|
|
32
|
+
"Bash(aws cloudformation update-stack:*)",
|
|
33
|
+
"Bash(aws cloudformation delete-stack:*)",
|
|
34
|
+
"Bash(cdk deploy:*)",
|
|
35
|
+
"Bash(cdk destroy:*)"
|
|
36
|
+
]
|
|
37
|
+
},
|
|
38
|
+
"mcpServers": {}
|
|
39
|
+
}
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: aws-deployment-preflight
|
|
3
|
+
description: Preflight validation for AWS infrastructure deployments (CloudFormation/CDK). Run before any cdk deploy / aws cloudformation deploy. Validates templates (cdk synth, cdk diff, CloudFormation validate-template / lint), cleans up stale or failed stacks that block re-deploy, catches globally-unique naming conflicts (S3/ECR/etc.), and checks service quota / capacity limits. Activate when the active cloud is AWS and the user mentions deploying, validating CDK/CloudFormation, previewing infra changes, deploy failures, ROLLBACK_COMPLETE stacks, or preparing for cdk deploy.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# AWS Deployment Preflight
|
|
7
|
+
|
|
8
|
+
The AWS analogue of pre-deploy validation: validate locally and clean up state
|
|
9
|
+
*before* you deploy, so CI doesn't discover what you could have caught. Works for
|
|
10
|
+
both AWS CDK projects and raw CloudFormation. Continue through all steps even if one
|
|
11
|
+
fails — capture every issue, then fix them in a batch.
|
|
12
|
+
|
|
13
|
+
> Discipline: **batch your fixes.** Each deploy/CI run costs real minutes. Read the
|
|
14
|
+
> whole failing stack, reason about every issue, fix them all, push once. One run per
|
|
15
|
+
> problem cluster, not one per error message.
|
|
16
|
+
|
|
17
|
+
## When to use
|
|
18
|
+
|
|
19
|
+
- Before `cdk deploy`, `cdk destroy`, `aws cloudformation deploy/create-stack`.
|
|
20
|
+
- When preparing or reviewing CDK / CloudFormation templates.
|
|
21
|
+
- To preview what a deploy will change.
|
|
22
|
+
- After a failed deploy left a stack stuck (`ROLLBACK_COMPLETE`, `*_FAILED`).
|
|
23
|
+
- Before an "it worked yesterday" infra mystery becomes a CI run.
|
|
24
|
+
|
|
25
|
+
## Step 1 — Detect project type
|
|
26
|
+
|
|
27
|
+
- **CDK project:** `cdk.json` at root; stacks in `bin/` + `lib/` (TS) or app entry
|
|
28
|
+
(Python). Identify the app and stack names: `cdk list`.
|
|
29
|
+
- **Raw CloudFormation:** `.yaml`/`.json` templates (`AWSTemplateFormatVersion`,
|
|
30
|
+
`Resources:`), often under `infra/`, `cloudformation/`, `templates/`.
|
|
31
|
+
- Confirm the target account/region: `aws sts get-caller-identity` and
|
|
32
|
+
`aws configure get region` (or `$AWS_REGION`). Deploying to the wrong account is the
|
|
33
|
+
most expensive mistake of all.
|
|
34
|
+
|
|
35
|
+
## Step 2 — Validate the template
|
|
36
|
+
|
|
37
|
+
### CDK
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
# Synthesize — fails on construct/TypeScript/context errors before any AWS call
|
|
41
|
+
cdk synth
|
|
42
|
+
|
|
43
|
+
# Diff against the deployed stack — the what-if. Shows resource + IAM changes.
|
|
44
|
+
cdk diff
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
`cdk synth` emits CloudFormation under `cdk.out/`. `cdk diff` flags **IAM/security
|
|
48
|
+
changes** (the `--require-approval` gate) — review those deliberately, never rubber-stamp.
|
|
49
|
+
|
|
50
|
+
### CloudFormation (raw)
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
# Server-side structural validation
|
|
54
|
+
aws cloudformation validate-template --template-body file://template.yaml
|
|
55
|
+
|
|
56
|
+
# Deeper linting — catches resource-property errors validate-template misses
|
|
57
|
+
cfn-lint template.yaml # pip install cfn-lint
|
|
58
|
+
|
|
59
|
+
# Preview changes without applying: change sets
|
|
60
|
+
aws cloudformation deploy --template-file template.yaml --stack-name <name> \
|
|
61
|
+
--no-execute-changeset # creates a change set you can inspect
|
|
62
|
+
aws cloudformation describe-change-set --change-set-name <arn>
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
> `validate-template` only checks structure/syntax — like `bicep build` or
|
|
66
|
+
> `terraform validate`, it will **not** catch invalid property combinations, quota
|
|
67
|
+
> issues, or naming collisions. `cfn-lint` + `cdk diff` / a change set are the real gate.
|
|
68
|
+
|
|
69
|
+
## Step 3 — Clean up stale / failed stacks
|
|
70
|
+
|
|
71
|
+
A failed `create-stack` leaves the stack in **`ROLLBACK_COMPLETE`** — it cannot be
|
|
72
|
+
updated, only deleted and recreated. `UPDATE_ROLLBACK_FAILED` needs
|
|
73
|
+
`continue-update-rollback`. Find and clear blockers before re-deploying:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
# Stacks stuck in a state that blocks a clean deploy
|
|
77
|
+
aws cloudformation list-stacks \
|
|
78
|
+
--stack-status-filter ROLLBACK_COMPLETE CREATE_FAILED DELETE_FAILED \
|
|
79
|
+
--query "StackSummaries[].{Name:StackName,Status:StackStatus}" --output table
|
|
80
|
+
|
|
81
|
+
# Inspect why one failed (read the FIRST failure event, not the cascade)
|
|
82
|
+
aws cloudformation describe-stack-events --stack-name <name> \
|
|
83
|
+
--query "StackEvents[?contains(ResourceStatus,'FAILED')].[LogicalResourceId,ResourceStatusReason]" \
|
|
84
|
+
--output table
|
|
85
|
+
|
|
86
|
+
# A ROLLBACK_COMPLETE stack must be deleted before recreating
|
|
87
|
+
aws cloudformation delete-stack --stack-name <name>
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
CDK: `cdk destroy <stack>` for the same effect. Watch for **resources that block
|
|
91
|
+
deletion** — non-empty S3 buckets, ECR repos with images, retained `RemovalPolicy`
|
|
92
|
+
resources, security groups with dependencies. Empty/detach them first.
|
|
93
|
+
|
|
94
|
+
## Step 4 — Globally-unique naming conflicts
|
|
95
|
+
|
|
96
|
+
Several AWS resource names live in a **global or account-region namespace** and collide
|
|
97
|
+
or are reserved/soft-deleted from prior attempts. The AWS analogue of Azure's Key
|
|
98
|
+
Vault / ACR name clashes — parameterize the name and override on conflict:
|
|
99
|
+
|
|
100
|
+
| Resource | Namespace | Conflict mode |
|
|
101
|
+
| --- | --- | --- |
|
|
102
|
+
| **S3 bucket** | Global (all accounts) | `BucketAlreadyExists` / `...OwnedByYou`. Names are not reusable immediately after delete. |
|
|
103
|
+
| **ECR repository** | Per account+region | `RepositoryAlreadyExistsException` |
|
|
104
|
+
| CloudFront / OAI, ACM cert | Global / regional | reuse vs recreate |
|
|
105
|
+
| IAM role/policy names | Per account (global) | `EntityAlreadyExists` if a prior stack left it |
|
|
106
|
+
| DynamoDB table, SQS/SNS, Log groups | Per account+region | recreate collisions after partial deploys |
|
|
107
|
+
|
|
108
|
+
**Pattern:** never hard-code a globally-unique name. Let CDK auto-name (it appends a
|
|
109
|
+
hash) or add a short unique suffix (account id fragment / random) via a CloudFormation
|
|
110
|
+
parameter, and override it when a name is taken. Prefer `aws s3api head-bucket` /
|
|
111
|
+
`aws ecr describe-repositories` to check availability before deploy.
|
|
112
|
+
|
|
113
|
+
## Step 5 — Service quota & capacity check
|
|
114
|
+
|
|
115
|
+
Deploys fail late when a quota is hit. Pre-check the limits the stack will consume:
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
# What's the current limit + usage for a service
|
|
119
|
+
aws service-quotas list-service-quotas --service-code lambda --output table
|
|
120
|
+
aws service-quotas get-service-quota --service-code vpc --quota-code <code>
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
Common deploy-blocking quotas: VPCs / EIPs / NAT Gateways per region, Elastic IP count,
|
|
124
|
+
Lambda concurrent executions, ECS/Fargate task limits, RDS instances, **CloudFormation
|
|
125
|
+
500-resource-per-stack limit** (split large stacks), IAM roles per account. Soft quotas
|
|
126
|
+
need a Service Quotas / support request with lead time — raise them *before* the deploy,
|
|
127
|
+
not during the incident.
|
|
128
|
+
|
|
129
|
+
## Step 6 — Check for an in-flight deploy before triggering
|
|
130
|
+
|
|
131
|
+
If CI auto-deploys on push, don't fire a manual deploy on top of it — the runs race.
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
gh run list --workflow="<deploy workflow>" --limit 3
|
|
135
|
+
aws cloudformation describe-stacks --stack-name <name> \
|
|
136
|
+
--query "Stacks[0].StackStatus" # *_IN_PROGRESS means a deploy is running
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
If a stack is `*_IN_PROGRESS`, wait — concurrent operations on one stack are rejected.
|
|
140
|
+
|
|
141
|
+
## Step 7 — Report
|
|
142
|
+
|
|
143
|
+
Summarize: validation results (synth/diff/lint), stacks cleaned up, naming overrides
|
|
144
|
+
applied, quota headroom, and the change set / `cdk diff` summary (creates / modifies /
|
|
145
|
+
**deletes** / replacements — flag any replacement of a stateful resource, and any IAM
|
|
146
|
+
change). State clearly whether it's safe to deploy.
|
|
147
|
+
|
|
148
|
+
## Tool requirements
|
|
149
|
+
|
|
150
|
+
`aws` CLI v2, `cdk` (for CDK projects), `cfn-lint` (recommended), `gh` (if CI-driven).
|
|
151
|
+
Verify auth first: `aws sts get-caller-identity`.
|
|
152
|
+
|
|
153
|
+
## Hard-won lessons
|
|
154
|
+
|
|
155
|
+
### CloudFormation cross-stack export deadlock on re-pointing a reference
|
|
156
|
+
**Symptom:** Moving a resource (e.g. ALB public→private) drops a consumer stack's
|
|
157
|
+
reference to a producer stack's export; the deploy rolls back with `Cannot delete
|
|
158
|
+
export … as it is in use by <consumer-stack>`.
|
|
159
|
+
**Cause:** The deadlock isn't about the *new* template — it's about the **delta**
|
|
160
|
+
versus the **live** stack. Removing a consumer's reference makes CFN prune the
|
|
161
|
+
producer's export while the old consumer is still deployed and using it.
|
|
162
|
+
**Fix:** Treat any change that drops a cross-stack reference as a **two-phase** op:
|
|
163
|
+
either retain the export across the transition (`stack.exportValue()`), or remove
|
|
164
|
+
the live consumer before re-pointing it. Apply the two-phase pattern by default once
|
|
165
|
+
you've hit this — don't reason your way out of the precaution.
|