agentic-team-templates 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +280 -0
- package/bin/cli.js +5 -0
- package/package.json +47 -0
- package/src/index.js +521 -0
- package/templates/_shared/code-quality.md +162 -0
- package/templates/_shared/communication.md +114 -0
- package/templates/_shared/core-principles.md +62 -0
- package/templates/_shared/git-workflow.md +165 -0
- package/templates/_shared/security-fundamentals.md +173 -0
- package/templates/blockchain/.cursorrules/defi-patterns.md +520 -0
- package/templates/blockchain/.cursorrules/gas-optimization.md +339 -0
- package/templates/blockchain/.cursorrules/overview.md +130 -0
- package/templates/blockchain/.cursorrules/security.md +318 -0
- package/templates/blockchain/.cursorrules/smart-contracts.md +364 -0
- package/templates/blockchain/.cursorrules/testing.md +415 -0
- package/templates/blockchain/.cursorrules/web3-integration.md +538 -0
- package/templates/blockchain/CLAUDE.md +389 -0
- package/templates/cli-tools/.cursorrules/architecture.md +412 -0
- package/templates/cli-tools/.cursorrules/arguments.md +406 -0
- package/templates/cli-tools/.cursorrules/distribution.md +546 -0
- package/templates/cli-tools/.cursorrules/error-handling.md +455 -0
- package/templates/cli-tools/.cursorrules/overview.md +136 -0
- package/templates/cli-tools/.cursorrules/testing.md +537 -0
- package/templates/cli-tools/.cursorrules/user-experience.md +545 -0
- package/templates/cli-tools/CLAUDE.md +356 -0
- package/templates/data-engineering/.cursorrules/data-modeling.md +367 -0
- package/templates/data-engineering/.cursorrules/data-quality.md +455 -0
- package/templates/data-engineering/.cursorrules/overview.md +85 -0
- package/templates/data-engineering/.cursorrules/performance.md +339 -0
- package/templates/data-engineering/.cursorrules/pipeline-design.md +280 -0
- package/templates/data-engineering/.cursorrules/security.md +460 -0
- package/templates/data-engineering/.cursorrules/testing.md +452 -0
- package/templates/data-engineering/CLAUDE.md +974 -0
- package/templates/devops-sre/.cursorrules/capacity-planning.md +653 -0
- package/templates/devops-sre/.cursorrules/change-management.md +584 -0
- package/templates/devops-sre/.cursorrules/chaos-engineering.md +651 -0
- package/templates/devops-sre/.cursorrules/disaster-recovery.md +641 -0
- package/templates/devops-sre/.cursorrules/incident-management.md +565 -0
- package/templates/devops-sre/.cursorrules/observability.md +714 -0
- package/templates/devops-sre/.cursorrules/overview.md +230 -0
- package/templates/devops-sre/.cursorrules/postmortems.md +588 -0
- package/templates/devops-sre/.cursorrules/runbooks.md +760 -0
- package/templates/devops-sre/.cursorrules/slo-sli.md +617 -0
- package/templates/devops-sre/.cursorrules/toil-reduction.md +567 -0
- package/templates/devops-sre/CLAUDE.md +1007 -0
- package/templates/documentation/.cursorrules/adr.md +277 -0
- package/templates/documentation/.cursorrules/api-documentation.md +411 -0
- package/templates/documentation/.cursorrules/code-comments.md +253 -0
- package/templates/documentation/.cursorrules/maintenance.md +260 -0
- package/templates/documentation/.cursorrules/overview.md +82 -0
- package/templates/documentation/.cursorrules/readme-standards.md +306 -0
- package/templates/documentation/CLAUDE.md +120 -0
- package/templates/fullstack/.cursorrules/api-contracts.md +331 -0
- package/templates/fullstack/.cursorrules/architecture.md +298 -0
- package/templates/fullstack/.cursorrules/overview.md +109 -0
- package/templates/fullstack/.cursorrules/shared-types.md +348 -0
- package/templates/fullstack/.cursorrules/testing.md +386 -0
- package/templates/fullstack/CLAUDE.md +349 -0
- package/templates/ml-ai/.cursorrules/data-engineering.md +483 -0
- package/templates/ml-ai/.cursorrules/deployment.md +601 -0
- package/templates/ml-ai/.cursorrules/model-development.md +538 -0
- package/templates/ml-ai/.cursorrules/monitoring.md +658 -0
- package/templates/ml-ai/.cursorrules/overview.md +131 -0
- package/templates/ml-ai/.cursorrules/security.md +637 -0
- package/templates/ml-ai/.cursorrules/testing.md +678 -0
- package/templates/ml-ai/CLAUDE.md +1136 -0
- package/templates/mobile/.cursorrules/navigation.md +246 -0
- package/templates/mobile/.cursorrules/offline-first.md +302 -0
- package/templates/mobile/.cursorrules/overview.md +71 -0
- package/templates/mobile/.cursorrules/performance.md +345 -0
- package/templates/mobile/.cursorrules/testing.md +339 -0
- package/templates/mobile/CLAUDE.md +233 -0
- package/templates/platform-engineering/.cursorrules/ci-cd.md +778 -0
- package/templates/platform-engineering/.cursorrules/developer-experience.md +632 -0
- package/templates/platform-engineering/.cursorrules/infrastructure-as-code.md +600 -0
- package/templates/platform-engineering/.cursorrules/kubernetes.md +710 -0
- package/templates/platform-engineering/.cursorrules/observability.md +747 -0
- package/templates/platform-engineering/.cursorrules/overview.md +215 -0
- package/templates/platform-engineering/.cursorrules/security.md +855 -0
- package/templates/platform-engineering/.cursorrules/testing.md +878 -0
- package/templates/platform-engineering/CLAUDE.md +850 -0
- package/templates/utility-agent/.cursorrules/action-control.md +284 -0
- package/templates/utility-agent/.cursorrules/context-management.md +186 -0
- package/templates/utility-agent/.cursorrules/hallucination-prevention.md +253 -0
- package/templates/utility-agent/.cursorrules/overview.md +78 -0
- package/templates/utility-agent/.cursorrules/token-optimization.md +369 -0
- package/templates/utility-agent/CLAUDE.md +513 -0
- package/templates/web-backend/.cursorrules/api-design.md +255 -0
- package/templates/web-backend/.cursorrules/authentication.md +309 -0
- package/templates/web-backend/.cursorrules/database-patterns.md +298 -0
- package/templates/web-backend/.cursorrules/error-handling.md +366 -0
- package/templates/web-backend/.cursorrules/overview.md +69 -0
- package/templates/web-backend/.cursorrules/security.md +358 -0
- package/templates/web-backend/.cursorrules/testing.md +395 -0
- package/templates/web-backend/CLAUDE.md +366 -0
- package/templates/web-frontend/.cursorrules/accessibility.md +296 -0
- package/templates/web-frontend/.cursorrules/component-patterns.md +204 -0
- package/templates/web-frontend/.cursorrules/overview.md +72 -0
- package/templates/web-frontend/.cursorrules/performance.md +325 -0
- package/templates/web-frontend/.cursorrules/state-management.md +227 -0
- package/templates/web-frontend/.cursorrules/styling.md +271 -0
- package/templates/web-frontend/.cursorrules/testing.md +311 -0
- package/templates/web-frontend/CLAUDE.md +399 -0
|
@@ -0,0 +1,584 @@
|
|
|
1
|
+
# Change Management
|
|
2
|
+
|
|
3
|
+
Comprehensive guidelines for safely deploying and managing changes in production.
|
|
4
|
+
|
|
5
|
+
## Core Principles
|
|
6
|
+
|
|
7
|
+
1. **Small Changes** - Smaller changes are easier to review, deploy, and rollback
|
|
8
|
+
2. **Progressive Delivery** - Roll out gradually, validate at each step
|
|
9
|
+
3. **Reversibility** - Every change should be reversible quickly
|
|
10
|
+
4. **Observability** - Know the impact of every change in real-time
|
|
11
|
+
|
|
12
|
+
## Change Categories
|
|
13
|
+
|
|
14
|
+
### Change Risk Levels
|
|
15
|
+
|
|
16
|
+
```yaml
|
|
17
|
+
low_risk:
|
|
18
|
+
description: "Minimal potential for user impact"
|
|
19
|
+
examples:
|
|
20
|
+
- "Documentation updates"
|
|
21
|
+
- "Logging changes"
|
|
22
|
+
- "Non-critical feature flags"
|
|
23
|
+
- "Test environment changes"
|
|
24
|
+
requirements:
|
|
25
|
+
- "Standard PR review"
|
|
26
|
+
- "Automated testing"
|
|
27
|
+
deployment: "Anytime during business hours"
|
|
28
|
+
approval: "Peer review"
|
|
29
|
+
|
|
30
|
+
medium_risk:
|
|
31
|
+
description: "Potential for limited user impact"
|
|
32
|
+
examples:
|
|
33
|
+
- "UI changes"
|
|
34
|
+
- "Non-critical API changes"
|
|
35
|
+
- "New features behind flags"
|
|
36
|
+
- "Performance optimizations"
|
|
37
|
+
requirements:
|
|
38
|
+
- "PR review"
|
|
39
|
+
- "Automated testing"
|
|
40
|
+
- "Staging validation"
|
|
41
|
+
deployment: "Business hours with monitoring"
|
|
42
|
+
approval: "Senior engineer review"
|
|
43
|
+
|
|
44
|
+
high_risk:
|
|
45
|
+
description: "Significant potential for user impact"
|
|
46
|
+
examples:
|
|
47
|
+
- "Database migrations"
|
|
48
|
+
- "Authentication changes"
|
|
49
|
+
- "Payment processing changes"
|
|
50
|
+
- "Critical API changes"
|
|
51
|
+
requirements:
|
|
52
|
+
- "PR review by multiple engineers"
|
|
53
|
+
- "Staging validation"
|
|
54
|
+
- "Rollback plan documented"
|
|
55
|
+
- "On-call aware"
|
|
56
|
+
deployment: "Scheduled change window"
|
|
57
|
+
approval: "Tech lead + on-call"
|
|
58
|
+
|
|
59
|
+
critical_risk:
|
|
60
|
+
description: "Core infrastructure or widespread impact"
|
|
61
|
+
examples:
|
|
62
|
+
- "Database schema changes"
|
|
63
|
+
- "Infrastructure migrations"
|
|
64
|
+
- "Security patches"
|
|
65
|
+
- "Core service refactoring"
|
|
66
|
+
requirements:
|
|
67
|
+
- "Change advisory board review"
|
|
68
|
+
- "Comprehensive testing"
|
|
69
|
+
- "Detailed rollback plan"
|
|
70
|
+
- "Incident response ready"
|
|
71
|
+
deployment: "Scheduled maintenance window"
|
|
72
|
+
approval: "Engineering leadership"
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Pre-Deployment Checklist
|
|
76
|
+
|
|
77
|
+
### Standard Deployment Checklist
|
|
78
|
+
|
|
79
|
+
```yaml
|
|
80
|
+
before_deploy:
|
|
81
|
+
code_quality:
|
|
82
|
+
- "PR approved by required reviewers"
|
|
83
|
+
- "All CI checks passing"
|
|
84
|
+
- "No new security vulnerabilities"
|
|
85
|
+
- "Test coverage maintained"
|
|
86
|
+
|
|
87
|
+
preparation:
|
|
88
|
+
- "Rollback plan documented"
|
|
89
|
+
- "Feature flags configured"
|
|
90
|
+
- "Monitoring dashboards ready"
|
|
91
|
+
- "On-call engineer aware (for high-risk)"
|
|
92
|
+
|
|
93
|
+
validation:
|
|
94
|
+
- "Tested in staging/preview"
|
|
95
|
+
- "No unexpected errors in staging"
|
|
96
|
+
- "Performance baseline established"
|
|
97
|
+
|
|
98
|
+
during_deploy:
|
|
99
|
+
monitoring:
|
|
100
|
+
- "Watch error rate dashboard"
|
|
101
|
+
- "Watch latency dashboard"
|
|
102
|
+
- "Watch deployment progress"
|
|
103
|
+
- "Check application logs"
|
|
104
|
+
|
|
105
|
+
checkpoints:
|
|
106
|
+
- "First pod healthy"
|
|
107
|
+
- "10% rollout successful"
|
|
108
|
+
- "50% rollout successful"
|
|
109
|
+
- "100% rollout successful"
|
|
110
|
+
|
|
111
|
+
after_deploy:
|
|
112
|
+
verification:
|
|
113
|
+
- "Smoke test critical paths"
|
|
114
|
+
- "Compare metrics to baseline"
|
|
115
|
+
- "Check for error rate increase"
|
|
116
|
+
- "Verify feature functionality"
|
|
117
|
+
|
|
118
|
+
documentation:
|
|
119
|
+
- "Update deployment log"
|
|
120
|
+
- "Note any issues encountered"
|
|
121
|
+
- "Update runbooks if needed"
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## Progressive Delivery
|
|
125
|
+
|
|
126
|
+
### Canary Deployments
|
|
127
|
+
|
|
128
|
+
```yaml
|
|
129
|
+
canary_process:
|
|
130
|
+
stage_1_canary:
|
|
131
|
+
traffic: "5%"
|
|
132
|
+
duration: "15 minutes minimum"
|
|
133
|
+
success_criteria:
|
|
134
|
+
- "Error rate ≤ baseline * 1.1"
|
|
135
|
+
- "Latency p99 ≤ baseline * 1.2"
|
|
136
|
+
- "No critical errors in logs"
|
|
137
|
+
failure_action: "Rollback immediately"
|
|
138
|
+
|
|
139
|
+
stage_2_partial:
|
|
140
|
+
traffic: "25%"
|
|
141
|
+
duration: "30 minutes minimum"
|
|
142
|
+
success_criteria:
|
|
143
|
+
- "Error rate ≤ baseline * 1.05"
|
|
144
|
+
- "Latency p99 ≤ baseline * 1.1"
|
|
145
|
+
failure_action: "Rollback to 0%"
|
|
146
|
+
|
|
147
|
+
stage_3_majority:
|
|
148
|
+
traffic: "50%"
|
|
149
|
+
duration: "1 hour minimum"
|
|
150
|
+
success_criteria:
|
|
151
|
+
- "Error rate ≈ baseline"
|
|
152
|
+
- "Latency p99 ≈ baseline"
|
|
153
|
+
failure_action: "Rollback to 25%"
|
|
154
|
+
|
|
155
|
+
stage_4_full:
|
|
156
|
+
traffic: "100%"
|
|
157
|
+
duration: "24 hour bake time"
|
|
158
|
+
success_criteria:
|
|
159
|
+
- "All metrics stable"
|
|
160
|
+
- "No user complaints"
|
|
161
|
+
failure_action: "Rollback to 50%"
|
|
162
|
+
|
|
163
|
+
kubernetes_canary: |
|
|
164
|
+
# Using Argo Rollouts
|
|
165
|
+
apiVersion: argoproj.io/v1alpha1
|
|
166
|
+
kind: Rollout
|
|
167
|
+
metadata:
|
|
168
|
+
name: api-server
|
|
169
|
+
spec:
|
|
170
|
+
replicas: 10
|
|
171
|
+
strategy:
|
|
172
|
+
canary:
|
|
173
|
+
steps:
|
|
174
|
+
- setWeight: 5
|
|
175
|
+
- pause: { duration: 15m }
|
|
176
|
+
- analysis:
|
|
177
|
+
templates:
|
|
178
|
+
- templateName: success-rate
|
|
179
|
+
- setWeight: 25
|
|
180
|
+
- pause: { duration: 30m }
|
|
181
|
+
- setWeight: 50
|
|
182
|
+
- pause: { duration: 1h }
|
|
183
|
+
- setWeight: 100
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
### Feature Flags
|
|
187
|
+
|
|
188
|
+
```yaml
|
|
189
|
+
feature_flag_workflow:
|
|
190
|
+
development:
|
|
191
|
+
- "Create flag in LaunchDarkly/Unleash"
|
|
192
|
+
- "Default: OFF for all environments"
|
|
193
|
+
- "Develop feature behind flag"
|
|
194
|
+
|
|
195
|
+
testing:
|
|
196
|
+
- "Enable flag in development"
|
|
197
|
+
- "Enable flag for QA in staging"
|
|
198
|
+
- "Test feature thoroughly"
|
|
199
|
+
|
|
200
|
+
rollout:
|
|
201
|
+
- "Enable for internal users in production"
|
|
202
|
+
- "Enable for 5% of production users"
|
|
203
|
+
- "Gradually increase percentage"
|
|
204
|
+
- "Monitor metrics at each step"
|
|
205
|
+
|
|
206
|
+
cleanup:
|
|
207
|
+
- "After 100% rollout and stable"
|
|
208
|
+
- "Remove flag checks from code"
|
|
209
|
+
- "Archive or delete flag"
|
|
210
|
+
|
|
211
|
+
flag_types:
|
|
212
|
+
release_flag:
|
|
213
|
+
purpose: "Control feature rollout"
|
|
214
|
+
lifecycle: "Temporary (remove after rollout)"
|
|
215
|
+
example: "enable_new_checkout"
|
|
216
|
+
|
|
217
|
+
ops_flag:
|
|
218
|
+
purpose: "Operational control"
|
|
219
|
+
lifecycle: "Permanent"
|
|
220
|
+
example: "enable_cache_bypass"
|
|
221
|
+
|
|
222
|
+
experiment_flag:
|
|
223
|
+
purpose: "A/B testing"
|
|
224
|
+
lifecycle: "Temporary"
|
|
225
|
+
example: "checkout_flow_variant"
|
|
226
|
+
|
|
227
|
+
permission_flag:
|
|
228
|
+
purpose: "User entitlements"
|
|
229
|
+
lifecycle: "Permanent"
|
|
230
|
+
example: "enable_premium_features"
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
### Blue-Green Deployments
|
|
234
|
+
|
|
235
|
+
```yaml
|
|
236
|
+
blue_green_process:
|
|
237
|
+
preparation:
|
|
238
|
+
- "Blue environment running current version"
|
|
239
|
+
- "Green environment prepared with new version"
|
|
240
|
+
- "Both environments identical infrastructure"
|
|
241
|
+
|
|
242
|
+
deployment:
|
|
243
|
+
- "Deploy new version to Green"
|
|
244
|
+
- "Run smoke tests on Green"
|
|
245
|
+
- "Verify health checks pass"
|
|
246
|
+
|
|
247
|
+
switch:
|
|
248
|
+
- "Update load balancer to point to Green"
|
|
249
|
+
- "Monitor metrics during switch"
|
|
250
|
+
- "Keep Blue running for immediate rollback"
|
|
251
|
+
|
|
252
|
+
verification:
|
|
253
|
+
- "Verify traffic flowing to Green"
|
|
254
|
+
- "Monitor for errors"
|
|
255
|
+
- "Test critical paths"
|
|
256
|
+
|
|
257
|
+
cleanup:
|
|
258
|
+
- "After bake time (24h), decommission Blue"
|
|
259
|
+
- "Green becomes new Blue for next deployment"
|
|
260
|
+
|
|
261
|
+
benefits:
|
|
262
|
+
- "Zero-downtime deployments"
|
|
263
|
+
- "Instant rollback (switch back to Blue)"
|
|
264
|
+
- "Full testing before traffic switch"
|
|
265
|
+
|
|
266
|
+
drawbacks:
|
|
267
|
+
- "Double infrastructure during deployment"
|
|
268
|
+
- "Database migrations more complex"
|
|
269
|
+
- "Session state management"
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
## Rollback Procedures
|
|
273
|
+
|
|
274
|
+
### Rollback Decision Tree
|
|
275
|
+
|
|
276
|
+
```yaml
|
|
277
|
+
rollback_decision:
|
|
278
|
+
immediate_rollback:
|
|
279
|
+
triggers:
|
|
280
|
+
- "Error rate > 5%"
|
|
281
|
+
- "P99 latency > 3x baseline"
|
|
282
|
+
- "Critical functionality broken"
|
|
283
|
+
- "Data corruption detected"
|
|
284
|
+
action: "Rollback immediately, investigate later"
|
|
285
|
+
|
|
286
|
+
conditional_rollback:
|
|
287
|
+
triggers:
|
|
288
|
+
- "Error rate > 1% for > 5 minutes"
|
|
289
|
+
- "Latency degraded but service functional"
|
|
290
|
+
- "Non-critical feature broken"
|
|
291
|
+
action: "Attempt quick fix (5 min), rollback if unsuccessful"
|
|
292
|
+
|
|
293
|
+
monitor_closely:
|
|
294
|
+
triggers:
|
|
295
|
+
- "Minor error rate increase (< 1%)"
|
|
296
|
+
- "Single user reports"
|
|
297
|
+
- "Logs show errors but metrics OK"
|
|
298
|
+
action: "Monitor for 15 minutes, prepare rollback"
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
### Rollback Commands
|
|
302
|
+
|
|
303
|
+
```yaml
|
|
304
|
+
kubernetes_rollback:
|
|
305
|
+
immediate: |
|
|
306
|
+
# Undo last deployment
|
|
307
|
+
kubectl rollout undo deployment/api-server
|
|
308
|
+
|
|
309
|
+
# Verify rollback
|
|
310
|
+
kubectl rollout status deployment/api-server
|
|
311
|
+
|
|
312
|
+
to_specific_version: |
|
|
313
|
+
# List revision history
|
|
314
|
+
kubectl rollout history deployment/api-server
|
|
315
|
+
|
|
316
|
+
# Rollback to specific revision
|
|
317
|
+
kubectl rollout undo deployment/api-server --to-revision=42
|
|
318
|
+
|
|
319
|
+
verify: |
|
|
320
|
+
# Check deployment status
|
|
321
|
+
kubectl get deployment api-server -o wide
|
|
322
|
+
|
|
323
|
+
# Check pods
|
|
324
|
+
kubectl get pods -l app=api-server
|
|
325
|
+
|
|
326
|
+
# Check events
|
|
327
|
+
kubectl describe deployment api-server | tail -20
|
|
328
|
+
|
|
329
|
+
argocd_rollback:
|
|
330
|
+
immediate: |
|
|
331
|
+
# Rollback to previous version
|
|
332
|
+
argocd app rollback api-server
|
|
333
|
+
|
|
334
|
+
to_specific: |
|
|
335
|
+
# List history
|
|
336
|
+
argocd app history api-server
|
|
337
|
+
|
|
338
|
+
# Sync to specific revision
|
|
339
|
+
argocd app sync api-server --revision <commit-sha>
|
|
340
|
+
|
|
341
|
+
helm_rollback:
|
|
342
|
+
immediate: |
|
|
343
|
+
# List releases
|
|
344
|
+
helm history api-server
|
|
345
|
+
|
|
346
|
+
# Rollback to previous
|
|
347
|
+
helm rollback api-server
|
|
348
|
+
|
|
349
|
+
to_specific: |
|
|
350
|
+
# Rollback to specific revision
|
|
351
|
+
helm rollback api-server 5
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
### Post-Rollback
|
|
355
|
+
|
|
356
|
+
```yaml
|
|
357
|
+
after_rollback:
|
|
358
|
+
immediate:
|
|
359
|
+
- "Verify service restored"
|
|
360
|
+
- "Update status page"
|
|
361
|
+
- "Notify stakeholders"
|
|
362
|
+
|
|
363
|
+
investigation:
|
|
364
|
+
- "Collect logs from failed deployment"
|
|
365
|
+
- "Identify root cause"
|
|
366
|
+
- "Document what went wrong"
|
|
367
|
+
|
|
368
|
+
remediation:
|
|
369
|
+
- "Fix issue in code"
|
|
370
|
+
- "Add tests to prevent regression"
|
|
371
|
+
- "Update deployment checklist"
|
|
372
|
+
- "Re-deploy with fix"
|
|
373
|
+
```
|
|
374
|
+
|
|
375
|
+
## Database Changes
|
|
376
|
+
|
|
377
|
+
### Safe Migration Practices
|
|
378
|
+
|
|
379
|
+
```yaml
|
|
380
|
+
migration_principles:
|
|
381
|
+
backward_compatible:
|
|
382
|
+
description: "Old code must work with new schema"
|
|
383
|
+
example: "Add column with default, don't remove columns"
|
|
384
|
+
|
|
385
|
+
forward_compatible:
|
|
386
|
+
description: "New code must work with old schema"
|
|
387
|
+
example: "New code handles missing column gracefully"
|
|
388
|
+
|
|
389
|
+
small_steps:
|
|
390
|
+
description: "Break large changes into small steps"
|
|
391
|
+
example: "Rename column: add new, copy data, update code, remove old"
|
|
392
|
+
|
|
393
|
+
safe_operations:
|
|
394
|
+
always_safe:
|
|
395
|
+
- "Add new table"
|
|
396
|
+
- "Add new column with default"
|
|
397
|
+
- "Add new index (CONCURRENTLY in Postgres)"
|
|
398
|
+
- "Create new view"
|
|
399
|
+
|
|
400
|
+
requires_care:
|
|
401
|
+
- "Add NOT NULL column"
|
|
402
|
+
- "Modify column type"
|
|
403
|
+
- "Add foreign key"
|
|
404
|
+
|
|
405
|
+
dangerous:
|
|
406
|
+
- "Drop table"
|
|
407
|
+
- "Drop column"
|
|
408
|
+
- "Rename column"
|
|
409
|
+
- "Change column type"
|
|
410
|
+
|
|
411
|
+
expand_contract_pattern:
|
|
412
|
+
phase_1_expand:
|
|
413
|
+
- "Add new column/table"
|
|
414
|
+
- "Deploy code that writes to both old and new"
|
|
415
|
+
- "Migrate existing data"
|
|
416
|
+
|
|
417
|
+
phase_2_migrate:
|
|
418
|
+
- "Deploy code that reads from new"
|
|
419
|
+
- "Verify all data migrated"
|
|
420
|
+
- "Stop writing to old"
|
|
421
|
+
|
|
422
|
+
phase_3_contract:
|
|
423
|
+
- "Remove old column/table"
|
|
424
|
+
- "Deploy code without old references"
|
|
425
|
+
```
|
|
426
|
+
|
|
427
|
+
### Migration Checklist
|
|
428
|
+
|
|
429
|
+
```yaml
|
|
430
|
+
migration_checklist:
|
|
431
|
+
before:
|
|
432
|
+
- "Test migration in staging"
|
|
433
|
+
- "Measure migration duration"
|
|
434
|
+
- "Plan for rollback"
|
|
435
|
+
- "Schedule maintenance window (if needed)"
|
|
436
|
+
- "Backup database"
|
|
437
|
+
|
|
438
|
+
during:
|
|
439
|
+
- "Run migration"
|
|
440
|
+
- "Monitor database performance"
|
|
441
|
+
- "Verify data integrity"
|
|
442
|
+
|
|
443
|
+
after:
|
|
444
|
+
- "Verify application functionality"
|
|
445
|
+
- "Check for deadlocks or slow queries"
|
|
446
|
+
- "Update documentation"
|
|
447
|
+
- "Clean up old data (if applicable)"
|
|
448
|
+
```
|
|
449
|
+
|
|
450
|
+
## Change Windows
|
|
451
|
+
|
|
452
|
+
### Maintenance Windows
|
|
453
|
+
|
|
454
|
+
```yaml
|
|
455
|
+
maintenance_window_types:
|
|
456
|
+
standard:
|
|
457
|
+
schedule: "Tuesday/Wednesday 2-4 AM local"
|
|
458
|
+
changes: "Routine updates, minor changes"
|
|
459
|
+
notification: "24 hours advance"
|
|
460
|
+
|
|
461
|
+
emergency:
|
|
462
|
+
schedule: "As needed"
|
|
463
|
+
changes: "Security patches, critical fixes"
|
|
464
|
+
notification: "As soon as possible"
|
|
465
|
+
|
|
466
|
+
major:
|
|
467
|
+
schedule: "Quarterly, announced 2 weeks ahead"
|
|
468
|
+
changes: "Infrastructure changes, major migrations"
|
|
469
|
+
notification: "2 weeks advance"
|
|
470
|
+
|
|
471
|
+
change_freeze_periods:
|
|
472
|
+
holidays:
|
|
473
|
+
- "Thanksgiving week (US)"
|
|
474
|
+
- "Christmas week"
|
|
475
|
+
- "Major shopping events"
|
|
476
|
+
criteria: "No non-critical changes"
|
|
477
|
+
exceptions: "Security fixes, critical bugs"
|
|
478
|
+
```
|
|
479
|
+
|
|
480
|
+
### Change Advisory Board
|
|
481
|
+
|
|
482
|
+
```yaml
|
|
483
|
+
cab_process:
|
|
484
|
+
purpose: "Review high-risk changes"
|
|
485
|
+
|
|
486
|
+
submission:
|
|
487
|
+
required_info:
|
|
488
|
+
- "Change description"
|
|
489
|
+
- "Risk assessment"
|
|
490
|
+
- "Test results"
|
|
491
|
+
- "Rollback plan"
|
|
492
|
+
- "Timeline"
|
|
493
|
+
|
|
494
|
+
review:
|
|
495
|
+
attendees:
|
|
496
|
+
- "Change requester"
|
|
497
|
+
- "On-call representative"
|
|
498
|
+
- "Security (if applicable)"
|
|
499
|
+
- "Database (if applicable)"
|
|
500
|
+
|
|
501
|
+
decision:
|
|
502
|
+
- "Approve"
|
|
503
|
+
- "Approve with conditions"
|
|
504
|
+
- "Request more information"
|
|
505
|
+
- "Reject"
|
|
506
|
+
|
|
507
|
+
tracking:
|
|
508
|
+
- "All changes logged"
|
|
509
|
+
- "Outcomes recorded"
|
|
510
|
+
- "Post-implementation review"
|
|
511
|
+
```
|
|
512
|
+
|
|
513
|
+
## Configuration Changes
|
|
514
|
+
|
|
515
|
+
### Configuration Management
|
|
516
|
+
|
|
517
|
+
```yaml
|
|
518
|
+
config_change_process:
|
|
519
|
+
version_control:
|
|
520
|
+
- "All config in Git"
|
|
521
|
+
- "PR required for changes"
|
|
522
|
+
- "Review before merge"
|
|
523
|
+
|
|
524
|
+
deployment:
|
|
525
|
+
- "ConfigMaps/Secrets updated"
|
|
526
|
+
- "Rolling restart if needed"
|
|
527
|
+
- "Verify new config active"
|
|
528
|
+
|
|
529
|
+
validation:
|
|
530
|
+
- "Config syntax validated"
|
|
531
|
+
- "Required fields present"
|
|
532
|
+
- "Values within expected ranges"
|
|
533
|
+
|
|
534
|
+
kubernetes_config: |
|
|
535
|
+
# ConfigMap change triggers pod restart
|
|
536
|
+
apiVersion: v1
|
|
537
|
+
kind: ConfigMap
|
|
538
|
+
metadata:
|
|
539
|
+
name: api-config
|
|
540
|
+
annotations:
|
|
541
|
+
# Add checksum to force restart on change
|
|
542
|
+
configmap.reloader.stakater.com/reload: "true"
|
|
543
|
+
data:
|
|
544
|
+
LOG_LEVEL: "info"
|
|
545
|
+
CACHE_TTL: "300"
|
|
546
|
+
|
|
547
|
+
secrets_rotation:
|
|
548
|
+
process:
|
|
549
|
+
- "Generate new secret"
|
|
550
|
+
- "Add new secret alongside old"
|
|
551
|
+
- "Deploy code using both"
|
|
552
|
+
- "Verify new secret works"
|
|
553
|
+
- "Remove old secret"
|
|
554
|
+
automation: "Use Vault or External Secrets for rotation"
|
|
555
|
+
```
|
|
556
|
+
|
|
557
|
+
## Common Pitfalls
|
|
558
|
+
|
|
559
|
+
```yaml
|
|
560
|
+
pitfall_big_bang_deploy:
|
|
561
|
+
problem: "Deploy everything at once"
|
|
562
|
+
impact: "Hard to identify issues, risky rollback"
|
|
563
|
+
solution: "Progressive delivery, small changes"
|
|
564
|
+
|
|
565
|
+
pitfall_no_rollback_plan:
|
|
566
|
+
problem: "Deploy without knowing how to undo"
|
|
567
|
+
impact: "Stuck with broken deployment"
|
|
568
|
+
solution: "Document and test rollback before deploy"
|
|
569
|
+
|
|
570
|
+
pitfall_deploy_on_friday:
|
|
571
|
+
problem: "Deploy late Friday afternoon"
|
|
572
|
+
impact: "Weekend incident, no support"
|
|
573
|
+
solution: "Deploy early in week, never before time off"
|
|
574
|
+
|
|
575
|
+
pitfall_config_drift:
|
|
576
|
+
problem: "Manual config changes not in version control"
|
|
577
|
+
impact: "Can't reproduce environment, drift"
|
|
578
|
+
solution: "GitOps, all config in version control"
|
|
579
|
+
|
|
580
|
+
pitfall_skip_staging:
|
|
581
|
+
problem: "Deploy directly to production"
|
|
582
|
+
impact: "Find bugs in production"
|
|
583
|
+
solution: "Always validate in staging first"
|
|
584
|
+
```
|