runbooks 0.7.6__py3-none-any.whl → 0.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. runbooks/__init__.py +1 -1
  2. runbooks/base.py +5 -1
  3. runbooks/cfat/__init__.py +8 -4
  4. runbooks/cfat/assessment/collectors.py +171 -14
  5. runbooks/cfat/assessment/compliance.py +871 -0
  6. runbooks/cfat/assessment/runner.py +122 -11
  7. runbooks/cfat/models.py +6 -2
  8. runbooks/common/logger.py +14 -0
  9. runbooks/common/rich_utils.py +451 -0
  10. runbooks/enterprise/__init__.py +68 -0
  11. runbooks/enterprise/error_handling.py +411 -0
  12. runbooks/enterprise/logging.py +439 -0
  13. runbooks/enterprise/multi_tenant.py +583 -0
  14. runbooks/finops/README.md +468 -241
  15. runbooks/finops/__init__.py +39 -3
  16. runbooks/finops/cli.py +83 -18
  17. runbooks/finops/cross_validation.py +375 -0
  18. runbooks/finops/dashboard_runner.py +812 -164
  19. runbooks/finops/enhanced_dashboard_runner.py +525 -0
  20. runbooks/finops/finops_dashboard.py +1892 -0
  21. runbooks/finops/helpers.py +485 -51
  22. runbooks/finops/optimizer.py +823 -0
  23. runbooks/finops/tests/__init__.py +19 -0
  24. runbooks/finops/tests/results_test_finops_dashboard.xml +1 -0
  25. runbooks/finops/tests/run_comprehensive_tests.py +421 -0
  26. runbooks/finops/tests/run_tests.py +305 -0
  27. runbooks/finops/tests/test_finops_dashboard.py +705 -0
  28. runbooks/finops/tests/test_integration.py +477 -0
  29. runbooks/finops/tests/test_performance.py +380 -0
  30. runbooks/finops/tests/test_performance_benchmarks.py +500 -0
  31. runbooks/finops/tests/test_reference_images_validation.py +867 -0
  32. runbooks/finops/tests/test_single_account_features.py +715 -0
  33. runbooks/finops/tests/validate_test_suite.py +220 -0
  34. runbooks/finops/types.py +1 -1
  35. runbooks/hitl/enhanced_workflow_engine.py +725 -0
  36. runbooks/inventory/artifacts/scale-optimize-status.txt +12 -0
  37. runbooks/inventory/collectors/aws_comprehensive.py +442 -0
  38. runbooks/inventory/collectors/enterprise_scale.py +281 -0
  39. runbooks/inventory/core/collector.py +172 -13
  40. runbooks/inventory/discovery.md +1 -1
  41. runbooks/inventory/list_ec2_instances.py +18 -20
  42. runbooks/inventory/list_ssm_parameters.py +31 -3
  43. runbooks/inventory/organizations_discovery.py +1269 -0
  44. runbooks/inventory/rich_inventory_display.py +393 -0
  45. runbooks/inventory/run_on_multi_accounts.py +35 -19
  46. runbooks/inventory/runbooks.security.report_generator.log +0 -0
  47. runbooks/inventory/runbooks.security.run_script.log +0 -0
  48. runbooks/inventory/vpc_flow_analyzer.py +1030 -0
  49. runbooks/main.py +2215 -119
  50. runbooks/metrics/dora_metrics_engine.py +599 -0
  51. runbooks/operate/__init__.py +2 -2
  52. runbooks/operate/base.py +122 -10
  53. runbooks/operate/deployment_framework.py +1032 -0
  54. runbooks/operate/deployment_validator.py +853 -0
  55. runbooks/operate/dynamodb_operations.py +10 -6
  56. runbooks/operate/ec2_operations.py +319 -11
  57. runbooks/operate/executive_dashboard.py +779 -0
  58. runbooks/operate/mcp_integration.py +750 -0
  59. runbooks/operate/nat_gateway_operations.py +1120 -0
  60. runbooks/operate/networking_cost_heatmap.py +685 -0
  61. runbooks/operate/privatelink_operations.py +940 -0
  62. runbooks/operate/s3_operations.py +10 -6
  63. runbooks/operate/vpc_endpoints.py +644 -0
  64. runbooks/operate/vpc_operations.py +1038 -0
  65. runbooks/remediation/__init__.py +2 -2
  66. runbooks/remediation/acm_remediation.py +1 -1
  67. runbooks/remediation/base.py +1 -1
  68. runbooks/remediation/cloudtrail_remediation.py +1 -1
  69. runbooks/remediation/cognito_remediation.py +1 -1
  70. runbooks/remediation/dynamodb_remediation.py +1 -1
  71. runbooks/remediation/ec2_remediation.py +1 -1
  72. runbooks/remediation/ec2_unattached_ebs_volumes.py +1 -1
  73. runbooks/remediation/kms_enable_key_rotation.py +1 -1
  74. runbooks/remediation/kms_remediation.py +1 -1
  75. runbooks/remediation/lambda_remediation.py +1 -1
  76. runbooks/remediation/multi_account.py +1 -1
  77. runbooks/remediation/rds_remediation.py +1 -1
  78. runbooks/remediation/s3_block_public_access.py +1 -1
  79. runbooks/remediation/s3_enable_access_logging.py +1 -1
  80. runbooks/remediation/s3_encryption.py +1 -1
  81. runbooks/remediation/s3_remediation.py +1 -1
  82. runbooks/remediation/vpc_remediation.py +475 -0
  83. runbooks/security/__init__.py +3 -1
  84. runbooks/security/compliance_automation.py +632 -0
  85. runbooks/security/report_generator.py +10 -0
  86. runbooks/security/run_script.py +31 -5
  87. runbooks/security/security_baseline_tester.py +169 -30
  88. runbooks/security/security_export.py +477 -0
  89. runbooks/validation/__init__.py +10 -0
  90. runbooks/validation/benchmark.py +484 -0
  91. runbooks/validation/cli.py +356 -0
  92. runbooks/validation/mcp_validator.py +768 -0
  93. runbooks/vpc/__init__.py +38 -0
  94. runbooks/vpc/config.py +212 -0
  95. runbooks/vpc/cost_engine.py +347 -0
  96. runbooks/vpc/heatmap_engine.py +605 -0
  97. runbooks/vpc/manager_interface.py +634 -0
  98. runbooks/vpc/networking_wrapper.py +1260 -0
  99. runbooks/vpc/rich_formatters.py +679 -0
  100. runbooks/vpc/tests/__init__.py +5 -0
  101. runbooks/vpc/tests/conftest.py +356 -0
  102. runbooks/vpc/tests/test_cli_integration.py +530 -0
  103. runbooks/vpc/tests/test_config.py +458 -0
  104. runbooks/vpc/tests/test_cost_engine.py +479 -0
  105. runbooks/vpc/tests/test_networking_wrapper.py +512 -0
  106. {runbooks-0.7.6.dist-info → runbooks-0.7.9.dist-info}/METADATA +40 -12
  107. {runbooks-0.7.6.dist-info → runbooks-0.7.9.dist-info}/RECORD +111 -50
  108. {runbooks-0.7.6.dist-info → runbooks-0.7.9.dist-info}/WHEEL +0 -0
  109. {runbooks-0.7.6.dist-info → runbooks-0.7.9.dist-info}/entry_points.txt +0 -0
  110. {runbooks-0.7.6.dist-info → runbooks-0.7.9.dist-info}/licenses/LICENSE +0 -0
  111. {runbooks-0.7.6.dist-info → runbooks-0.7.9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1032 @@
1
+ """
2
+ Production Deployment Framework for AWS Networking Cost Optimization
3
+ Terminal 5: Deploy Agent - Enterprise Security-as-Code Implementation
4
+
5
+ Comprehensive production deployment framework with enterprise-grade safety controls,
6
+ monitoring, alerting, and rollback procedures for AWS networking cost optimization.
7
+
8
+ Features:
9
+ - Default DRY-RUN mode for all operations
10
+ - Management approval gates for cost impact >$1000
11
+ - Comprehensive rollback procedures with automated recovery
12
+ - Zero-downtime deployment approach with canary strategy
13
+ - Real-time monitoring with alerting on execution failures
14
+ - MCP server integration for production validation
15
+ - Executive dashboard deployment with ROI tracking
16
+
17
+ Production Safety Requirements:
18
+ - All destructive operations default to dry-run mode
19
+ - Cost impact validation with approval workflows
20
+ - Automated rollback on performance degradation
21
+ - Comprehensive audit trails and compliance tracking
22
+ - Multi-profile AWS integration with proper RBAC
23
+ """
24
+
25
+ import asyncio
26
+ import json
27
+ import time
28
+ from concurrent.futures import ThreadPoolExecutor
29
+ from dataclasses import dataclass, field
30
+ from datetime import datetime, timedelta
31
+ from enum import Enum
32
+ from pathlib import Path
33
+ from typing import Any, Callable, Dict, List, Optional, Union
34
+
35
+ import boto3
36
+ from botocore.exceptions import ClientError
37
+ from loguru import logger
38
+
39
+ from runbooks.common.rich_utils import RichConsole
40
+ from runbooks.operate.base import BaseOperation, OperationContext, OperationResult, OperationStatus
41
+ from runbooks.operate.vpc_operations import VPCOperations
42
+
43
+
44
+ class DeploymentStrategy(Enum):
45
+ """Deployment strategy options for production rollouts."""
46
+
47
+ BLUE_GREEN = "blue_green"
48
+ CANARY = "canary"
49
+ ROLLING = "rolling"
50
+ ALL_AT_ONCE = "all_at_once"
51
+
52
+
53
+ class ApprovalStatus(Enum):
54
+ """Approval status for production operations."""
55
+
56
+ PENDING = "pending"
57
+ APPROVED = "approved"
58
+ REJECTED = "rejected"
59
+ EXPIRED = "expired"
60
+
61
+
62
+ class MonitoringAlert(Enum):
63
+ """Monitoring alert severity levels."""
64
+
65
+ CRITICAL = "critical"
66
+ HIGH = "high"
67
+ MEDIUM = "medium"
68
+ LOW = "low"
69
+ INFO = "info"
70
+
71
+
72
+ @dataclass
73
+ class ApprovalRequest:
74
+ """Production approval request with business context."""
75
+
76
+ request_id: str
77
+ operation_type: str
78
+ resource_id: str
79
+ cost_impact_monthly: float
80
+ cost_impact_annual: float
81
+ business_justification: str
82
+ risk_assessment: str
83
+ requestor: str
84
+ created_at: datetime = field(default_factory=datetime.utcnow)
85
+ expires_at: Optional[datetime] = None
86
+ status: ApprovalStatus = ApprovalStatus.PENDING
87
+ approver: Optional[str] = None
88
+ approval_notes: Optional[str] = None
89
+
90
+ def __post_init__(self):
91
+ if self.expires_at is None:
92
+ # Default 24-hour approval window
93
+ self.expires_at = self.created_at + timedelta(hours=24)
94
+
95
+
96
+ @dataclass
97
+ class DeploymentPlan:
98
+ """Comprehensive deployment plan with safety controls."""
99
+
100
+ deployment_id: str
101
+ strategy: DeploymentStrategy
102
+ target_accounts: List[str]
103
+ target_regions: List[str]
104
+ operations: List[Dict[str, Any]]
105
+ approval_required: bool = True
106
+ dry_run_first: bool = True
107
+ rollback_enabled: bool = True
108
+ monitoring_enabled: bool = True
109
+ cost_threshold: float = 1000.0 # $1000 monthly cost threshold
110
+
111
+ # Safety thresholds
112
+ error_rate_threshold: float = 0.05 # 5% error rate triggers rollback
113
+ latency_threshold: float = 12.0 # 12s latency threshold
114
+ availability_threshold: float = 0.995 # 99.5% availability minimum
115
+
116
+ # Timing controls
117
+ canary_duration: int = 300 # 5 minutes canary phase
118
+ rollout_duration: int = 1800 # 30 minutes total rollout
119
+ monitoring_duration: int = 3600 # 1 hour post-deployment monitoring
120
+
121
+
122
+ @dataclass
123
+ class DeploymentStatus:
124
+ """Real-time deployment status tracking."""
125
+
126
+ deployment_id: str
127
+ current_phase: str
128
+ started_at: datetime
129
+ completed_at: Optional[datetime] = None
130
+ progress_percentage: float = 0.0
131
+ successful_operations: int = 0
132
+ failed_operations: int = 0
133
+ rollback_triggered: bool = False
134
+ rollback_reason: Optional[str] = None
135
+
136
+ # Performance metrics
137
+ avg_execution_time: float = 0.0
138
+ error_rate: float = 0.0
139
+ availability_score: float = 1.0
140
+
141
+
142
+ class ProductionDeploymentFramework(BaseOperation):
143
+ """
144
+ Enterprise Production Deployment Framework
145
+
146
+ Terminal 5: Deploy Agent implementation with comprehensive safety controls,
147
+ monitoring, rollback procedures, and compliance tracking for AWS networking
148
+ cost optimization campaigns.
149
+
150
+ Core Features:
151
+ - Multi-stage deployment with approval gates
152
+ - Real-time performance monitoring and alerting
153
+ - Automated rollback on performance degradation
154
+ - Comprehensive audit trails and compliance tracking
155
+ - MCP server integration for validation
156
+ - Executive dashboard and ROI tracking
157
+ """
158
+
159
+ service_name = "deployment-framework"
160
+ supported_operations = {
161
+ "deploy_optimization_campaign",
162
+ "validate_deployment_plan",
163
+ "execute_canary_deployment",
164
+ "monitor_deployment_health",
165
+ "trigger_rollback",
166
+ "generate_deployment_report",
167
+ "setup_monitoring_alerts",
168
+ "create_approval_request",
169
+ "process_approval_workflow",
170
+ }
171
+ requires_confirmation = True
172
+
173
+ def __init__(self, profile: Optional[str] = None, region: Optional[str] = None, dry_run: bool = True):
174
+ """
175
+ Initialize Production Deployment Framework.
176
+
177
+ Args:
178
+ profile: AWS profile for authentication
179
+ region: AWS region for operations
180
+ dry_run: Enable dry-run mode (ENABLED BY DEFAULT for safety)
181
+ """
182
+ super().__init__(profile, region, dry_run)
183
+ self.rich_console = RichConsole()
184
+ self.vpc_operations = VPCOperations(profile, region, dry_run)
185
+
186
+ # Production safety defaults
187
+ self.default_dry_run = True # ALWAYS default to dry-run for safety
188
+ self.approval_timeout_hours = 24
189
+ self.cost_approval_threshold = 1000.0 # $1000 monthly threshold
190
+
191
+ # Monitoring configuration
192
+ self.monitoring_interval = 30 # seconds
193
+ self.health_check_timeout = 10 # seconds
194
+ self.max_retries = 3
195
+
196
+ # AWS profiles for multi-account operations
197
+ self.aws_profiles = {
198
+ "single_account": "ams-shared-services-non-prod-ReadOnlyAccess-499201730520",
199
+ "centralised_ops": "ams-centralised-ops-ReadOnlyAccess-335083429030",
200
+ "billing": "ams-admin-Billing-ReadOnlyAccess-909135376185",
201
+ }
202
+
203
+ # Deployment tracking
204
+ self.active_deployments: Dict[str, DeploymentStatus] = {}
205
+ self.approval_requests: Dict[str, ApprovalRequest] = {}
206
+
207
+ # Artifact storage
208
+ self.artifacts_dir = Path("artifacts/deployments")
209
+ self.artifacts_dir.mkdir(parents=True, exist_ok=True)
210
+
211
+ logger.info(f"Production Deployment Framework initialized - Safety Mode: {self.default_dry_run}")
212
+
213
+ async def deploy_optimization_campaign(self, deployment_plan: DeploymentPlan) -> Dict[str, Any]:
214
+ """
215
+ Execute comprehensive AWS networking cost optimization deployment campaign.
216
+
217
+ This is the main entry point for production deployments with full
218
+ enterprise safety controls, monitoring, and approval workflows.
219
+
220
+ Args:
221
+ deployment_plan: Comprehensive deployment configuration
222
+
223
+ Returns:
224
+ Dict containing deployment results and status
225
+ """
226
+ deployment_id = deployment_plan.deployment_id
227
+
228
+ self.rich_console.print_panel(
229
+ "🚀 Production Deployment Campaign",
230
+ f"Deployment ID: {deployment_id}\n"
231
+ f"Strategy: {deployment_plan.strategy.value}\n"
232
+ f"Target Accounts: {len(deployment_plan.target_accounts)}\n"
233
+ f"Operations: {len(deployment_plan.operations)}\n"
234
+ f"Cost Impact: ${sum(op.get('cost_impact', 0) for op in deployment_plan.operations):.0f}/month\n"
235
+ f"Safety Mode: {'ENABLED' if deployment_plan.dry_run_first else 'DISABLED'}",
236
+ title="🏗️ Enterprise Deployment",
237
+ )
238
+
239
+ try:
240
+ # Initialize deployment tracking
241
+ deployment_status = DeploymentStatus(
242
+ deployment_id=deployment_id, current_phase="initialization", started_at=datetime.utcnow()
243
+ )
244
+ self.active_deployments[deployment_id] = deployment_status
245
+
246
+ # Phase 1: Pre-deployment validation
247
+ validation_result = await self._validate_deployment_plan(deployment_plan)
248
+ if not validation_result["success"]:
249
+ return {"status": "failed", "phase": "validation", "error": validation_result["error"]}
250
+
251
+ deployment_status.current_phase = "validation_complete"
252
+ deployment_status.progress_percentage = 10.0
253
+
254
+ # Phase 2: Approval workflow (if required)
255
+ if deployment_plan.approval_required:
256
+ approval_result = await self._process_approval_workflow(deployment_plan)
257
+ if not approval_result["approved"]:
258
+ return {"status": "cancelled", "phase": "approval", "reason": approval_result["reason"]}
259
+
260
+ deployment_status.current_phase = "approved"
261
+ deployment_status.progress_percentage = 20.0
262
+
263
+ # Phase 3: Dry-run execution (if enabled)
264
+ if deployment_plan.dry_run_first:
265
+ dry_run_result = await self._execute_dry_run(deployment_plan)
266
+ if not dry_run_result["success"]:
267
+ return {"status": "failed", "phase": "dry_run", "error": dry_run_result["error"]}
268
+
269
+ deployment_status.current_phase = "dry_run_complete"
270
+ deployment_status.progress_percentage = 40.0
271
+
272
+ # Phase 4: Production deployment
273
+ deployment_result = await self._execute_production_deployment(deployment_plan, deployment_status)
274
+
275
+ # Phase 5: Post-deployment monitoring
276
+ if deployment_plan.monitoring_enabled:
277
+ monitoring_result = await self._monitor_deployment_health(deployment_plan, deployment_status)
278
+
279
+ # Generate comprehensive deployment report
280
+ report_result = await self._generate_deployment_report(deployment_plan, deployment_status)
281
+
282
+ return {
283
+ "status": "success",
284
+ "deployment_id": deployment_id,
285
+ "phases_completed": deployment_status.current_phase,
286
+ "total_operations": len(deployment_plan.operations),
287
+ "successful_operations": deployment_status.successful_operations,
288
+ "failed_operations": deployment_status.failed_operations,
289
+ "rollback_triggered": deployment_status.rollback_triggered,
290
+ "deployment_report": report_result,
291
+ }
292
+
293
+ except Exception as e:
294
+ error_msg = f"Deployment campaign failed: {str(e)}"
295
+ logger.error(error_msg)
296
+
297
+ # Trigger emergency rollback if needed
298
+ if deployment_status.successful_operations > 0:
299
+ await self._trigger_emergency_rollback(deployment_plan, deployment_status, str(e))
300
+
301
+ return {"status": "failed", "deployment_id": deployment_id, "error": error_msg, "rollback_triggered": True}
302
+
303
+ async def _validate_deployment_plan(self, deployment_plan: DeploymentPlan) -> Dict[str, Any]:
304
+ """
305
+ Comprehensive deployment plan validation with security checks.
306
+
307
+ Args:
308
+ deployment_plan: Deployment plan to validate
309
+
310
+ Returns:
311
+ Dict containing validation results
312
+ """
313
+ self.rich_console.print_info("🔍 Validating deployment plan...")
314
+
315
+ validation_issues = []
316
+ warnings = []
317
+
318
+ try:
319
+ # Validate target accounts and permissions
320
+ for account_id in deployment_plan.target_accounts:
321
+ if not await self._validate_account_access(account_id):
322
+ validation_issues.append(f"Invalid or insufficient access to account {account_id}")
323
+
324
+ # Validate target regions
325
+ for region in deployment_plan.target_regions:
326
+ if not await self._validate_region_availability(region):
327
+ validation_issues.append(f"Region {region} not available or accessible")
328
+
329
+ # Validate cost impact and approval requirements
330
+ total_monthly_cost = sum(op.get("cost_impact", 0) for op in deployment_plan.operations)
331
+ if total_monthly_cost > deployment_plan.cost_threshold:
332
+ if not deployment_plan.approval_required:
333
+ validation_issues.append(f"Cost impact ${total_monthly_cost:.0f}/month requires approval")
334
+
335
+ # Validate operation types and parameters
336
+ for i, operation in enumerate(deployment_plan.operations):
337
+ if not self._validate_operation_parameters(operation):
338
+ validation_issues.append(f"Invalid parameters in operation {i + 1}")
339
+
340
+ # Security validation
341
+ security_issues = await self._validate_security_compliance(deployment_plan)
342
+ validation_issues.extend(security_issues)
343
+
344
+ # Resource dependency validation
345
+ dependency_issues = await self._validate_resource_dependencies(deployment_plan)
346
+ validation_issues.extend(dependency_issues)
347
+
348
+ if validation_issues:
349
+ self.rich_console.print_error(f"❌ Validation failed with {len(validation_issues)} issues:")
350
+ for issue in validation_issues:
351
+ self.rich_console.print_error(f" • {issue}")
352
+
353
+ return {"success": False, "error": "Validation failed", "issues": validation_issues}
354
+
355
+ if warnings:
356
+ self.rich_console.print_warning(f"⚠️ Validation completed with {len(warnings)} warnings:")
357
+ for warning in warnings:
358
+ self.rich_console.print_warning(f" • {warning}")
359
+
360
+ self.rich_console.print_success("✅ Deployment plan validation successful")
361
+ return {"success": True, "warnings": warnings}
362
+
363
+ except Exception as e:
364
+ error_msg = f"Validation error: {str(e)}"
365
+ logger.error(error_msg)
366
+ return {"success": False, "error": error_msg}
367
+
368
+ async def _process_approval_workflow(self, deployment_plan: DeploymentPlan) -> Dict[str, Any]:
369
+ """
370
+ Process approval workflow for production deployments.
371
+
372
+ Args:
373
+ deployment_plan: Deployment plan requiring approval
374
+
375
+ Returns:
376
+ Dict containing approval status and details
377
+ """
378
+ total_cost_impact = sum(op.get("cost_impact", 0) for op in deployment_plan.operations)
379
+
380
+ # Create approval request
381
+ approval_request = ApprovalRequest(
382
+ request_id=f"APPROVE-{deployment_plan.deployment_id}",
383
+ operation_type="cost_optimization_deployment",
384
+ resource_id=deployment_plan.deployment_id,
385
+ cost_impact_monthly=total_cost_impact,
386
+ cost_impact_annual=total_cost_impact * 12,
387
+ business_justification="AWS networking cost optimization campaign with projected 25-50% savings",
388
+ risk_assessment="Low risk - automated deployment with rollback capability",
389
+ requestor="deploy-agent-terminal-5",
390
+ )
391
+
392
+ self.approval_requests[approval_request.request_id] = approval_request
393
+
394
+ self.rich_console.print_panel(
395
+ "🔐 Management Approval Required",
396
+ f"Request ID: {approval_request.request_id}\n"
397
+ f"Monthly Cost Impact: ${total_cost_impact:.0f}\n"
398
+ f"Annual Cost Impact: ${total_cost_impact * 12:.0f}\n"
399
+ f"Expires: {approval_request.expires_at.strftime('%Y-%m-%d %H:%M:%S')}\n"
400
+ f"Risk Level: LOW (automated with rollback)",
401
+ title="🏢 Executive Approval Gate",
402
+ )
403
+
404
+ # For production deployment, require interactive approval
405
+ if not self.dry_run:
406
+ approval_response = (
407
+ input("\n🎯 Management Approval Required - Proceed with deployment? (yes/no): ").lower().strip()
408
+ )
409
+
410
+ if approval_response in ["yes", "y", "approve"]:
411
+ approval_request.status = ApprovalStatus.APPROVED
412
+ approval_request.approver = "management-terminal-0"
413
+ approval_request.approval_notes = "Approved for cost optimization deployment"
414
+
415
+ self.rich_console.print_success("✅ Deployment approved - proceeding with execution")
416
+ return {"approved": True, "approval_id": approval_request.request_id}
417
+ else:
418
+ approval_request.status = ApprovalStatus.REJECTED
419
+ approval_request.approval_notes = "Deployment rejected by management"
420
+
421
+ self.rich_console.print_warning("❌ Deployment rejected - operation cancelled")
422
+ return {"approved": False, "reason": "Management rejected deployment"}
423
+ else:
424
+ # Dry-run mode - simulate approval
425
+ self.rich_console.print_info("[DRY-RUN] Simulating management approval")
426
+ return {"approved": True, "approval_id": approval_request.request_id, "simulated": True}
427
+
428
+ async def _execute_production_deployment(
429
+ self, deployment_plan: DeploymentPlan, deployment_status: DeploymentStatus
430
+ ) -> Dict[str, Any]:
431
+ """
432
+ Execute production deployment with chosen strategy.
433
+
434
+ Args:
435
+ deployment_plan: Deployment configuration
436
+ deployment_status: Current deployment status
437
+
438
+ Returns:
439
+ Dict containing deployment results
440
+ """
441
+ deployment_status.current_phase = "production_deployment"
442
+
443
+ self.rich_console.print_panel(
444
+ f"🚀 Executing {deployment_plan.strategy.value.replace('_', ' ').title()} Deployment",
445
+ f"Operations: {len(deployment_plan.operations)}\n"
446
+ f"Target Accounts: {len(deployment_plan.target_accounts)}\n"
447
+ f"Monitoring: {'ENABLED' if deployment_plan.monitoring_enabled else 'DISABLED'}\n"
448
+ f"Rollback: {'ENABLED' if deployment_plan.rollback_enabled else 'DISABLED'}",
449
+ title="🏗️ Production Execution",
450
+ )
451
+
452
+ try:
453
+ if deployment_plan.strategy == DeploymentStrategy.CANARY:
454
+ return await self._execute_canary_deployment(deployment_plan, deployment_status)
455
+ elif deployment_plan.strategy == DeploymentStrategy.BLUE_GREEN:
456
+ return await self._execute_blue_green_deployment(deployment_plan, deployment_status)
457
+ elif deployment_plan.strategy == DeploymentStrategy.ROLLING:
458
+ return await self._execute_rolling_deployment(deployment_plan, deployment_status)
459
+ else: # ALL_AT_ONCE
460
+ return await self._execute_all_at_once_deployment(deployment_plan, deployment_status)
461
+
462
+ except Exception as e:
463
+ error_msg = f"Production deployment failed: {str(e)}"
464
+ logger.error(error_msg)
465
+
466
+ if deployment_plan.rollback_enabled:
467
+ await self._trigger_emergency_rollback(deployment_plan, deployment_status, error_msg)
468
+
469
+ return {"success": False, "error": error_msg}
470
+
471
+ async def _execute_canary_deployment(
472
+ self, deployment_plan: DeploymentPlan, deployment_status: DeploymentStatus
473
+ ) -> Dict[str, Any]:
474
+ """
475
+ Execute canary deployment with gradual rollout and monitoring.
476
+
477
+ Args:
478
+ deployment_plan: Deployment configuration
479
+ deployment_status: Current deployment status
480
+
481
+ Returns:
482
+ Dict containing canary deployment results
483
+ """
484
+ self.rich_console.print_info("🐤 Starting Canary Deployment Phase")
485
+
486
+ # Phase 1: Deploy to canary group (10% of targets)
487
+ canary_accounts = deployment_plan.target_accounts[: max(1, len(deployment_plan.target_accounts) // 10)]
488
+
489
+ canary_result = await self._deploy_to_account_group(canary_accounts, deployment_plan.operations, "canary")
490
+
491
+ if not canary_result["success"]:
492
+ return {"success": False, "error": "Canary deployment failed", "details": canary_result}
493
+
494
+ deployment_status.progress_percentage = 30.0
495
+
496
+ # Phase 2: Monitor canary for stability
497
+ self.rich_console.print_info(f"⏱️ Monitoring canary for {deployment_plan.canary_duration}s...")
498
+
499
+ monitoring_result = await self._monitor_canary_health(
500
+ canary_accounts, deployment_plan.canary_duration, deployment_status
501
+ )
502
+
503
+ if not monitoring_result["healthy"]:
504
+ # Trigger rollback
505
+ await self._rollback_canary_deployment(canary_accounts, deployment_status)
506
+ return {"success": False, "error": "Canary failed health checks", "metrics": monitoring_result["metrics"]}
507
+
508
+ deployment_status.progress_percentage = 60.0
509
+
510
+ # Phase 3: Deploy to remaining accounts
511
+ remaining_accounts = deployment_plan.target_accounts[len(canary_accounts) :]
512
+
513
+ if remaining_accounts:
514
+ production_result = await self._deploy_to_account_group(
515
+ remaining_accounts, deployment_plan.operations, "production"
516
+ )
517
+
518
+ if not production_result["success"]:
519
+ # Rollback everything
520
+ await self._trigger_full_rollback(deployment_plan, deployment_status)
521
+ return {"success": False, "error": "Production rollout failed"}
522
+
523
+ deployment_status.progress_percentage = 100.0
524
+ deployment_status.current_phase = "deployment_complete"
525
+ deployment_status.completed_at = datetime.utcnow()
526
+
527
+ self.rich_console.print_success("🎉 Canary deployment completed successfully!")
528
+
529
+ return {
530
+ "success": True,
531
+ "strategy": "canary",
532
+ "canary_accounts": len(canary_accounts),
533
+ "production_accounts": len(remaining_accounts),
534
+ "total_operations": deployment_status.successful_operations,
535
+ }
536
+
537
+ async def _monitor_deployment_health(
538
+ self, deployment_plan: DeploymentPlan, deployment_status: DeploymentStatus
539
+ ) -> Dict[str, Any]:
540
+ """
541
+ Monitor deployment health with real-time metrics and alerting.
542
+
543
+ Args:
544
+ deployment_plan: Deployment configuration
545
+ deployment_status: Current deployment status
546
+
547
+ Returns:
548
+ Dict containing monitoring results and metrics
549
+ """
550
+ self.rich_console.print_info("📊 Starting post-deployment health monitoring...")
551
+
552
+ monitoring_start = datetime.utcnow()
553
+ monitoring_end = monitoring_start + timedelta(seconds=deployment_plan.monitoring_duration)
554
+
555
+ metrics = {
556
+ "error_rate": 0.0,
557
+ "avg_response_time": 0.0,
558
+ "availability": 1.0,
559
+ "cost_savings": 0.0,
560
+ "alerts_triggered": 0,
561
+ }
562
+
563
+ while datetime.utcnow() < monitoring_end:
564
+ try:
565
+ # Check deployment health across all accounts
566
+ health_results = await self._check_deployment_health(
567
+ deployment_plan.target_accounts, deployment_plan.target_regions
568
+ )
569
+
570
+ # Update metrics
571
+ metrics["error_rate"] = health_results.get("error_rate", 0.0)
572
+ metrics["avg_response_time"] = health_results.get("avg_response_time", 0.0)
573
+ metrics["availability"] = health_results.get("availability", 1.0)
574
+
575
+ # Check threshold breaches
576
+ alerts_triggered = []
577
+
578
+ if metrics["error_rate"] > deployment_plan.error_rate_threshold:
579
+ alerts_triggered.append(f"Error rate {metrics['error_rate']:.2%} exceeds threshold")
580
+
581
+ if metrics["avg_response_time"] > deployment_plan.latency_threshold:
582
+ alerts_triggered.append(f"Latency {metrics['avg_response_time']:.2f}s exceeds threshold")
583
+
584
+ if metrics["availability"] < deployment_plan.availability_threshold:
585
+ alerts_triggered.append(f"Availability {metrics['availability']:.2%} below threshold")
586
+
587
+ if alerts_triggered:
588
+ self.rich_console.print_warning(f"⚠️ Health check alerts: {len(alerts_triggered)}")
589
+ for alert in alerts_triggered:
590
+ self.rich_console.print_warning(f" • {alert}")
591
+
592
+ metrics["alerts_triggered"] += len(alerts_triggered)
593
+
594
+ # Trigger rollback if critical thresholds breached
595
+ if (
596
+ metrics["error_rate"] > deployment_plan.error_rate_threshold * 2
597
+ or metrics["availability"] < deployment_plan.availability_threshold
598
+ ):
599
+ self.rich_console.print_error("🚨 Critical thresholds breached - triggering rollback!")
600
+ await self._trigger_emergency_rollback(
601
+ deployment_plan, deployment_status, "Health monitoring threshold breach"
602
+ )
603
+ break
604
+
605
+ # Sleep before next check
606
+ await asyncio.sleep(deployment_plan.monitoring_interval)
607
+
608
+ except Exception as e:
609
+ logger.error(f"Health monitoring error: {str(e)}")
610
+ metrics["alerts_triggered"] += 1
611
+
612
+ self.rich_console.print_success("✅ Health monitoring completed")
613
+
614
+ return {
615
+ "success": True,
616
+ "duration_seconds": deployment_plan.monitoring_duration,
617
+ "metrics": metrics,
618
+ "alerts_triggered": metrics["alerts_triggered"],
619
+ "rollback_triggered": deployment_status.rollback_triggered,
620
+ }
621
+
622
+ async def _generate_deployment_report(
623
+ self, deployment_plan: DeploymentPlan, deployment_status: DeploymentStatus
624
+ ) -> Dict[str, Any]:
625
+ """
626
+ Generate comprehensive deployment report for executive review.
627
+
628
+ Args:
629
+ deployment_plan: Deployment configuration
630
+ deployment_status: Final deployment status
631
+
632
+ Returns:
633
+ Dict containing deployment report data
634
+ """
635
+ self.rich_console.print_info("📝 Generating deployment report...")
636
+
637
+ # Calculate deployment metrics
638
+ total_duration = (
639
+ (deployment_status.completed_at or datetime.utcnow()) - deployment_status.started_at
640
+ ).total_seconds()
641
+
642
+ success_rate = deployment_status.successful_operations / max(
643
+ 1, deployment_status.successful_operations + deployment_status.failed_operations
644
+ )
645
+
646
+ # Calculate cost impact
647
+ total_cost_impact = sum(op.get("cost_impact", 0) for op in deployment_plan.operations)
648
+ estimated_annual_savings = total_cost_impact * 12 * 0.3 # 30% savings estimate
649
+
650
+ # Generate comprehensive report
651
+ report = {
652
+ "deployment_summary": {
653
+ "deployment_id": deployment_plan.deployment_id,
654
+ "strategy": deployment_plan.strategy.value,
655
+ "started_at": deployment_status.started_at.isoformat(),
656
+ "completed_at": (deployment_status.completed_at or datetime.utcnow()).isoformat(),
657
+ "total_duration_minutes": total_duration / 60,
658
+ "success_rate": success_rate,
659
+ "rollback_triggered": deployment_status.rollback_triggered,
660
+ },
661
+ "operations_summary": {
662
+ "total_operations": len(deployment_plan.operations),
663
+ "successful_operations": deployment_status.successful_operations,
664
+ "failed_operations": deployment_status.failed_operations,
665
+ "target_accounts": len(deployment_plan.target_accounts),
666
+ "target_regions": len(deployment_plan.target_regions),
667
+ },
668
+ "cost_impact": {
669
+ "monthly_cost_impact": total_cost_impact,
670
+ "annual_cost_impact": total_cost_impact * 12,
671
+ "estimated_annual_savings": estimated_annual_savings,
672
+ "roi_percentage": (estimated_annual_savings / (total_cost_impact * 12)) * 100
673
+ if total_cost_impact > 0
674
+ else 0,
675
+ },
676
+ "safety_metrics": {
677
+ "dry_run_executed": deployment_plan.dry_run_first,
678
+ "approval_required": deployment_plan.approval_required,
679
+ "rollback_enabled": deployment_plan.rollback_enabled,
680
+ "monitoring_enabled": deployment_plan.monitoring_enabled,
681
+ "avg_execution_time": deployment_status.avg_execution_time,
682
+ "error_rate": deployment_status.error_rate,
683
+ "availability_score": deployment_status.availability_score,
684
+ },
685
+ "executive_summary": {
686
+ "deployment_status": "SUCCESS"
687
+ if success_rate > 0.95
688
+ else "PARTIAL_SUCCESS"
689
+ if success_rate > 0.8
690
+ else "FAILED",
691
+ "business_impact": f"${estimated_annual_savings:.0f} annual savings potential",
692
+ "operational_impact": f"{deployment_status.successful_operations}/{len(deployment_plan.operations)} operations completed",
693
+ "risk_assessment": "LOW" if not deployment_status.rollback_triggered else "MEDIUM",
694
+ "next_steps": self._generate_next_steps_recommendations(deployment_status, success_rate),
695
+ },
696
+ }
697
+
698
+ # Export report to artifacts
699
+ report_timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
700
+ report_path = self.artifacts_dir / f"deployment_report_{deployment_plan.deployment_id}_{report_timestamp}.json"
701
+
702
+ with open(report_path, "w") as f:
703
+ json.dump(report, f, indent=2, default=str)
704
+
705
+ # Display executive summary
706
+ self.rich_console.print_panel(
707
+ "📊 Deployment Report Summary",
708
+ f"Status: {report['executive_summary']['deployment_status']}\n"
709
+ f"Success Rate: {success_rate:.1%}\n"
710
+ f"Duration: {total_duration / 60:.1f} minutes\n"
711
+ f"Business Impact: {report['executive_summary']['business_impact']}\n"
712
+ f"Report Saved: {report_path}",
713
+ title="🎯 Executive Summary",
714
+ )
715
+
716
+ self.rich_console.print_success(f"✅ Deployment report generated: {report_path}")
717
+
718
+ return report
719
+
720
+ def _generate_next_steps_recommendations(
721
+ self, deployment_status: DeploymentStatus, success_rate: float
722
+ ) -> List[str]:
723
+ """Generate next steps recommendations based on deployment results."""
724
+
725
+ recommendations = []
726
+
727
+ if success_rate >= 0.95:
728
+ recommendations.extend(
729
+ [
730
+ "Monitor cost savings over next 30 days",
731
+ "Document successful deployment patterns",
732
+ "Plan next optimization phase for additional accounts",
733
+ ]
734
+ )
735
+ elif success_rate >= 0.8:
736
+ recommendations.extend(
737
+ [
738
+ "Review failed operations for root cause analysis",
739
+ "Optimize deployment procedures based on lessons learned",
740
+ "Consider retry of failed operations with improved parameters",
741
+ ]
742
+ )
743
+ else:
744
+ recommendations.extend(
745
+ [
746
+ "Conduct thorough post-mortem analysis",
747
+ "Review and strengthen pre-deployment validation",
748
+ "Consider rollback of successful operations if business impact negative",
749
+ ]
750
+ )
751
+
752
+ if deployment_status.rollback_triggered:
753
+ recommendations.extend(
754
+ [
755
+ "Analyze rollback root causes",
756
+ "Improve monitoring thresholds and alerting",
757
+ "Strengthen deployment health checks",
758
+ ]
759
+ )
760
+
761
+ return recommendations
762
+
763
+ # Utility methods for deployment execution
764
+ async def _deploy_to_account_group(
765
+ self, accounts: List[str], operations: List[Dict[str, Any]], group_name: str
766
+ ) -> Dict[str, Any]:
767
+ """Deploy operations to a group of accounts with parallel execution."""
768
+
769
+ self.rich_console.print_info(f"🚀 Deploying to {group_name} group: {len(accounts)} accounts")
770
+
771
+ successful_accounts = 0
772
+ failed_accounts = 0
773
+
774
+ # Parallel execution across accounts
775
+ tasks = []
776
+ for account_id in accounts:
777
+ task = self._deploy_to_single_account(account_id, operations)
778
+ tasks.append(task)
779
+
780
+ results = await asyncio.gather(*tasks, return_exceptions=True)
781
+
782
+ for i, result in enumerate(results):
783
+ if isinstance(result, Exception):
784
+ self.rich_console.print_error(f"❌ Account {accounts[i]} deployment failed: {str(result)}")
785
+ failed_accounts += 1
786
+ elif result.get("success", False):
787
+ successful_accounts += 1
788
+ else:
789
+ failed_accounts += 1
790
+
791
+ success_rate = successful_accounts / len(accounts) if accounts else 0
792
+
793
+ self.rich_console.print_info(
794
+ f"📊 {group_name.title()} deployment complete: "
795
+ f"{successful_accounts}/{len(accounts)} accounts successful ({success_rate:.1%})"
796
+ )
797
+
798
+ return {
799
+ "success": success_rate > 0.8, # 80% success threshold
800
+ "successful_accounts": successful_accounts,
801
+ "failed_accounts": failed_accounts,
802
+ "success_rate": success_rate,
803
+ }
804
+
805
+ async def _deploy_to_single_account(self, account_id: str, operations: List[Dict[str, Any]]) -> Dict[str, Any]:
806
+ """Deploy operations to a single account."""
807
+
808
+ try:
809
+ for operation in operations:
810
+ # Execute individual operation
811
+ operation_result = await self._execute_single_operation(account_id, operation)
812
+
813
+ if not operation_result.get("success", False):
814
+ return {
815
+ "success": False,
816
+ "account_id": account_id,
817
+ "failed_operation": operation.get("type"),
818
+ "error": operation_result.get("error"),
819
+ }
820
+
821
+ return {"success": True, "account_id": account_id}
822
+
823
+ except Exception as e:
824
+ return {"success": False, "account_id": account_id, "error": str(e)}
825
+
826
+ async def _execute_single_operation(self, account_id: str, operation: Dict[str, Any]) -> Dict[str, Any]:
827
+ """Execute a single operation with proper error handling."""
828
+
829
+ operation_type = operation.get("type")
830
+
831
+ try:
832
+ if operation_type == "optimize_nat_gateway":
833
+ return await self._optimize_nat_gateway_operation(account_id, operation)
834
+ elif operation_type == "cleanup_unused_eips":
835
+ return await self._cleanup_eips_operation(account_id, operation)
836
+ elif operation_type == "vpc_cost_analysis":
837
+ return await self._vpc_cost_analysis_operation(account_id, operation)
838
+ else:
839
+ return {"success": False, "error": f"Unknown operation type: {operation_type}"}
840
+
841
+ except Exception as e:
842
+ logger.error(f"Operation {operation_type} failed for account {account_id}: {str(e)}")
843
+ return {"success": False, "error": str(e)}
844
+
845
+ # Emergency rollback procedures
846
+ async def _trigger_emergency_rollback(
847
+ self, deployment_plan: DeploymentPlan, deployment_status: DeploymentStatus, reason: str
848
+ ):
849
+ """Trigger emergency rollback with comprehensive recovery."""
850
+
851
+ self.rich_console.print_error(f"🚨 EMERGENCY ROLLBACK TRIGGERED: {reason}")
852
+
853
+ deployment_status.rollback_triggered = True
854
+ deployment_status.rollback_reason = reason
855
+ deployment_status.current_phase = "emergency_rollback"
856
+
857
+ # Log rollback initiation
858
+ logger.critical(f"Emergency rollback initiated for {deployment_plan.deployment_id}: {reason}")
859
+
860
+ # Execute rollback procedures
861
+ rollback_successful = await self._execute_rollback_procedures(deployment_plan)
862
+
863
+ if rollback_successful:
864
+ self.rich_console.print_success("✅ Emergency rollback completed successfully")
865
+ else:
866
+ self.rich_console.print_error("❌ Emergency rollback encountered issues - manual intervention required")
867
+
868
+ # Generate incident report
869
+ await self._generate_incident_report(deployment_plan, deployment_status, reason)
870
+
871
+ async def _execute_rollback_procedures(self, deployment_plan: DeploymentPlan) -> bool:
872
+ """Execute comprehensive rollback procedures."""
873
+
874
+ self.rich_console.print_warning("🔄 Executing rollback procedures...")
875
+
876
+ rollback_successful = True
877
+
878
+ try:
879
+ # Rollback in reverse order of deployment
880
+ for account_id in reversed(deployment_plan.target_accounts):
881
+ account_rollback = await self._rollback_account_operations(account_id)
882
+ if not account_rollback:
883
+ rollback_successful = False
884
+ logger.error(f"Rollback failed for account {account_id}")
885
+
886
+ return rollback_successful
887
+
888
+ except Exception as e:
889
+ logger.error(f"Rollback execution failed: {str(e)}")
890
+ return False
891
+
892
+ # Validation helper methods
893
+ async def _validate_account_access(self, account_id: str) -> bool:
894
+ """Validate access to target account."""
895
+ try:
896
+ # Simulate account access validation
897
+ return True # In production, implement actual cross-account role assumption validation
898
+ except Exception:
899
+ return False
900
+
901
+ async def _validate_region_availability(self, region: str) -> bool:
902
+ """Validate region availability and access."""
903
+ try:
904
+ # Simulate region validation
905
+ return region in ["us-east-1", "us-west-2", "eu-west-1", "ap-southeast-1"]
906
+ except Exception:
907
+ return False
908
+
909
+ def _validate_operation_parameters(self, operation: Dict[str, Any]) -> bool:
910
+ """Validate operation parameters."""
911
+ required_fields = ["type", "target", "parameters"]
912
+ return all(field in operation for field in required_fields)
913
+
914
+ async def _validate_security_compliance(self, deployment_plan: DeploymentPlan) -> List[str]:
915
+ """Validate security compliance requirements."""
916
+ issues = []
917
+
918
+ # Check for required security controls
919
+ if not deployment_plan.dry_run_first:
920
+ issues.append("Dry-run validation is required for security compliance")
921
+
922
+ if not deployment_plan.approval_required:
923
+ issues.append("Approval workflow is required for production deployments")
924
+
925
+ return issues
926
+
927
+ async def _validate_resource_dependencies(self, deployment_plan: DeploymentPlan) -> List[str]:
928
+ """Validate resource dependencies and prerequisites."""
929
+ issues = []
930
+
931
+ # Check for dependency conflicts
932
+ operation_types = [op.get("type") for op in deployment_plan.operations]
933
+
934
+ if "delete_vpc" in operation_types and "create_nat_gateway" in operation_types:
935
+ issues.append("Cannot create NAT Gateway in VPC scheduled for deletion")
936
+
937
+ return issues
938
+
939
+
940
+ # Deployment plan factory for common scenarios
941
+ class DeploymentPlanFactory:
942
+ """Factory for creating common deployment plans."""
943
+
944
+ @staticmethod
945
+ def create_cost_optimization_campaign(
946
+ target_accounts: List[str],
947
+ target_regions: List[str] = None,
948
+ strategy: DeploymentStrategy = DeploymentStrategy.CANARY,
949
+ ) -> DeploymentPlan:
950
+ """Create deployment plan for comprehensive cost optimization campaign."""
951
+
952
+ deployment_id = f"cost-opt-{datetime.utcnow().strftime('%Y%m%d-%H%M%S')}"
953
+
954
+ # Default to common regions if not specified
955
+ if not target_regions:
956
+ target_regions = ["us-east-1", "us-west-2"]
957
+
958
+ # Define optimization operations
959
+ operations = [
960
+ {
961
+ "type": "analyze_nat_costs",
962
+ "target": "all_vpcs",
963
+ "parameters": {},
964
+ "cost_impact": 0, # Analysis only
965
+ },
966
+ {
967
+ "type": "optimize_nat_gateway",
968
+ "target": "underutilized_nat_gateways",
969
+ "parameters": {"consolidation_enabled": True},
970
+ "cost_impact": 135, # 3 NAT gateways × $45/month
971
+ },
972
+ {
973
+ "type": "cleanup_unused_eips",
974
+ "target": "all_regions",
975
+ "parameters": {"release_unused": True},
976
+ "cost_impact": 36, # 10 EIPs × $3.60/month
977
+ },
978
+ {
979
+ "type": "vpc_cost_analysis",
980
+ "target": "all_vpcs",
981
+ "parameters": {"generate_report": True},
982
+ "cost_impact": 0, # Reporting only
983
+ },
984
+ ]
985
+
986
+ return DeploymentPlan(
987
+ deployment_id=deployment_id,
988
+ strategy=strategy,
989
+ target_accounts=target_accounts,
990
+ target_regions=target_regions,
991
+ operations=operations,
992
+ approval_required=True,
993
+ dry_run_first=True,
994
+ rollback_enabled=True,
995
+ monitoring_enabled=True,
996
+ cost_threshold=100.0, # Lower threshold for cost optimization
997
+ )
998
+
999
+ @staticmethod
1000
+ def create_emergency_rollback_plan(original_deployment_id: str, target_accounts: List[str]) -> DeploymentPlan:
1001
+ """Create deployment plan for emergency rollback operations."""
1002
+
1003
+ deployment_id = f"rollback-{original_deployment_id}"
1004
+
1005
+ # Rollback operations (reverse of optimizations)
1006
+ operations = [
1007
+ {
1008
+ "type": "restore_nat_gateways",
1009
+ "target": "consolidated_gateways",
1010
+ "parameters": {"restore_original_configuration": True},
1011
+ "cost_impact": -135, # Negative cost impact (increased spend)
1012
+ },
1013
+ {
1014
+ "type": "restore_elastic_ips",
1015
+ "target": "released_eips",
1016
+ "parameters": {"recreate_released_eips": False}, # Cannot recreate same IPs
1017
+ "cost_impact": 0,
1018
+ },
1019
+ ]
1020
+
1021
+ return DeploymentPlan(
1022
+ deployment_id=deployment_id,
1023
+ strategy=DeploymentStrategy.ALL_AT_ONCE, # Emergency rollback
1024
+ target_accounts=target_accounts,
1025
+ target_regions=["us-east-1", "us-west-2"],
1026
+ operations=operations,
1027
+ approval_required=False, # Emergency operations
1028
+ dry_run_first=False, # Emergency deployment
1029
+ rollback_enabled=False, # This IS the rollback
1030
+ monitoring_enabled=True,
1031
+ cost_threshold=1000.0,
1032
+ )