runbooks 0.7.9__py3-none-any.whl → 0.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- runbooks/__init__.py +1 -1
- runbooks/cfat/README.md +12 -1
- runbooks/cfat/__init__.py +1 -1
- runbooks/cfat/assessment/compliance.py +4 -1
- runbooks/cfat/assessment/runner.py +42 -34
- runbooks/cfat/models.py +1 -1
- runbooks/cloudops/__init__.py +123 -0
- runbooks/cloudops/base.py +385 -0
- runbooks/cloudops/cost_optimizer.py +811 -0
- runbooks/cloudops/infrastructure_optimizer.py +29 -0
- runbooks/cloudops/interfaces.py +828 -0
- runbooks/cloudops/lifecycle_manager.py +29 -0
- runbooks/cloudops/mcp_cost_validation.py +678 -0
- runbooks/cloudops/models.py +251 -0
- runbooks/cloudops/monitoring_automation.py +29 -0
- runbooks/cloudops/notebook_framework.py +676 -0
- runbooks/cloudops/security_enforcer.py +449 -0
- runbooks/common/__init__.py +152 -0
- runbooks/common/accuracy_validator.py +1039 -0
- runbooks/common/context_logger.py +440 -0
- runbooks/common/cross_module_integration.py +594 -0
- runbooks/common/enhanced_exception_handler.py +1108 -0
- runbooks/common/enterprise_audit_integration.py +634 -0
- runbooks/common/mcp_cost_explorer_integration.py +900 -0
- runbooks/common/mcp_integration.py +548 -0
- runbooks/common/performance_monitor.py +387 -0
- runbooks/common/profile_utils.py +216 -0
- runbooks/common/rich_utils.py +172 -1
- runbooks/feedback/user_feedback_collector.py +440 -0
- runbooks/finops/README.md +377 -458
- runbooks/finops/__init__.py +4 -21
- runbooks/finops/account_resolver.py +279 -0
- runbooks/finops/accuracy_cross_validator.py +638 -0
- runbooks/finops/aws_client.py +721 -36
- runbooks/finops/budget_integration.py +313 -0
- runbooks/finops/cli.py +59 -5
- runbooks/finops/cost_optimizer.py +1340 -0
- runbooks/finops/cost_processor.py +211 -37
- runbooks/finops/dashboard_router.py +900 -0
- runbooks/finops/dashboard_runner.py +990 -232
- runbooks/finops/embedded_mcp_validator.py +288 -0
- runbooks/finops/enhanced_dashboard_runner.py +8 -7
- runbooks/finops/enhanced_progress.py +327 -0
- runbooks/finops/enhanced_trend_visualization.py +423 -0
- runbooks/finops/finops_dashboard.py +184 -1829
- runbooks/finops/helpers.py +509 -196
- runbooks/finops/iam_guidance.py +400 -0
- runbooks/finops/markdown_exporter.py +466 -0
- runbooks/finops/multi_dashboard.py +1502 -0
- runbooks/finops/optimizer.py +15 -15
- runbooks/finops/profile_processor.py +2 -2
- runbooks/finops/runbooks.inventory.organizations_discovery.log +0 -0
- runbooks/finops/runbooks.security.report_generator.log +0 -0
- runbooks/finops/runbooks.security.run_script.log +0 -0
- runbooks/finops/runbooks.security.security_export.log +0 -0
- runbooks/finops/schemas.py +589 -0
- runbooks/finops/service_mapping.py +195 -0
- runbooks/finops/single_dashboard.py +710 -0
- runbooks/finops/tests/test_reference_images_validation.py +1 -1
- runbooks/inventory/README.md +12 -1
- runbooks/inventory/core/collector.py +157 -29
- runbooks/inventory/list_ec2_instances.py +9 -6
- runbooks/inventory/list_ssm_parameters.py +10 -10
- runbooks/inventory/organizations_discovery.py +210 -164
- runbooks/inventory/rich_inventory_display.py +74 -107
- runbooks/inventory/run_on_multi_accounts.py +13 -13
- runbooks/inventory/runbooks.inventory.organizations_discovery.log +0 -0
- runbooks/inventory/runbooks.security.security_export.log +0 -0
- runbooks/main.py +1371 -240
- runbooks/metrics/dora_metrics_engine.py +711 -17
- runbooks/monitoring/performance_monitor.py +433 -0
- runbooks/operate/README.md +394 -0
- runbooks/operate/base.py +215 -47
- runbooks/operate/ec2_operations.py +435 -5
- runbooks/operate/iam_operations.py +598 -3
- runbooks/operate/privatelink_operations.py +1 -1
- runbooks/operate/rds_operations.py +508 -0
- runbooks/operate/s3_operations.py +508 -0
- runbooks/operate/vpc_endpoints.py +1 -1
- runbooks/remediation/README.md +489 -13
- runbooks/remediation/base.py +5 -3
- runbooks/remediation/commons.py +8 -4
- runbooks/security/ENTERPRISE_SECURITY_FRAMEWORK.md +506 -0
- runbooks/security/README.md +12 -1
- runbooks/security/__init__.py +265 -33
- runbooks/security/cloudops_automation_security_validator.py +1164 -0
- runbooks/security/compliance_automation.py +12 -10
- runbooks/security/compliance_automation_engine.py +1021 -0
- runbooks/security/enterprise_security_framework.py +930 -0
- runbooks/security/enterprise_security_policies.json +293 -0
- runbooks/security/executive_security_dashboard.py +1247 -0
- runbooks/security/integration_test_enterprise_security.py +879 -0
- runbooks/security/module_security_integrator.py +641 -0
- runbooks/security/multi_account_security_controls.py +2254 -0
- runbooks/security/real_time_security_monitor.py +1196 -0
- runbooks/security/report_generator.py +1 -1
- runbooks/security/run_script.py +4 -8
- runbooks/security/security_baseline_tester.py +39 -52
- runbooks/security/security_export.py +99 -120
- runbooks/sre/README.md +472 -0
- runbooks/sre/__init__.py +33 -0
- runbooks/sre/mcp_reliability_engine.py +1049 -0
- runbooks/sre/performance_optimization_engine.py +1032 -0
- runbooks/sre/production_monitoring_framework.py +584 -0
- runbooks/sre/reliability_monitoring_framework.py +1011 -0
- runbooks/validation/__init__.py +2 -2
- runbooks/validation/benchmark.py +154 -149
- runbooks/validation/cli.py +159 -147
- runbooks/validation/mcp_validator.py +291 -248
- runbooks/vpc/README.md +478 -0
- runbooks/vpc/__init__.py +2 -2
- runbooks/vpc/manager_interface.py +366 -351
- runbooks/vpc/networking_wrapper.py +68 -36
- runbooks/vpc/rich_formatters.py +22 -8
- runbooks-0.9.1.dist-info/METADATA +308 -0
- {runbooks-0.7.9.dist-info → runbooks-0.9.1.dist-info}/RECORD +120 -59
- {runbooks-0.7.9.dist-info → runbooks-0.9.1.dist-info}/entry_points.txt +1 -1
- runbooks/finops/cross_validation.py +0 -375
- runbooks-0.7.9.dist-info/METADATA +0 -636
- {runbooks-0.7.9.dist-info → runbooks-0.9.1.dist-info}/WHEEL +0 -0
- {runbooks-0.7.9.dist-info → runbooks-0.9.1.dist-info}/licenses/LICENSE +0 -0
- {runbooks-0.7.9.dist-info → runbooks-0.9.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1340 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
Cost Optimization Module - Migrated from unSkript notebooks
|
4
|
+
===============================================
|
5
|
+
|
6
|
+
Objective: Stop idle EC2 instances and optimize AWS costs using CloudWatch metrics
|
7
|
+
Description: Find and stop EC2 instances with low CPU utilization to reduce costs
|
8
|
+
Step-by-Step:
|
9
|
+
1. Find Idle EC2 Instances (using CloudWatch CPU metrics)
|
10
|
+
2. Stop AWS Instances (with safety checks)
|
11
|
+
3. Report cost savings potential
|
12
|
+
|
13
|
+
Input: region, idle_cpu_threshold, idle_duration, instance_ids (optional)
|
14
|
+
Output: List of stopped instances and cost impact analysis
|
15
|
+
"""
|
16
|
+
|
17
|
+
from typing import Dict, List, Optional, Tuple, Any
|
18
|
+
import datetime
|
19
|
+
import boto3
|
20
|
+
from botocore.exceptions import ClientError
|
21
|
+
from pydantic import BaseModel, Field
|
22
|
+
from dataclasses import dataclass
|
23
|
+
|
24
|
+
from ..common.rich_utils import (
|
25
|
+
console, print_header, print_success, print_error, print_warning,
|
26
|
+
create_table, create_progress_bar, format_cost
|
27
|
+
)
|
28
|
+
|
29
|
+
@dataclass
|
30
|
+
class IdleInstance:
|
31
|
+
"""Data class for idle EC2 instances"""
|
32
|
+
instance_id: str
|
33
|
+
region: str
|
34
|
+
instance_type: str = ""
|
35
|
+
avg_cpu_utilization: float = 0.0
|
36
|
+
estimated_monthly_cost: float = 0.0
|
37
|
+
tags: Dict[str, str] = Field(default_factory=dict)
|
38
|
+
|
39
|
+
@dataclass
|
40
|
+
class LowUsageVolume:
|
41
|
+
"""Data class for low usage EBS volumes"""
|
42
|
+
volume_id: str
|
43
|
+
region: str
|
44
|
+
volume_type: str = ""
|
45
|
+
size_gb: int = 0
|
46
|
+
avg_usage: float = 0.0
|
47
|
+
estimated_monthly_cost: float = 0.0
|
48
|
+
creation_date: Optional[str] = None
|
49
|
+
tags: Dict[str, str] = Field(default_factory=dict)
|
50
|
+
|
51
|
+
@dataclass
|
52
|
+
class UnusedNATGateway:
|
53
|
+
"""Data class for unused NAT Gateways"""
|
54
|
+
nat_gateway_id: str
|
55
|
+
region: str
|
56
|
+
vpc_id: str = ""
|
57
|
+
state: str = ""
|
58
|
+
estimated_monthly_cost: float = 45.0 # ~$45/month per NAT Gateway
|
59
|
+
creation_date: Optional[str] = None
|
60
|
+
tags: Dict[str, str] = Field(default_factory=dict)
|
61
|
+
|
62
|
+
@dataclass
|
63
|
+
class CostOptimizationResult:
|
64
|
+
"""Results from cost optimization operations"""
|
65
|
+
stopped_instances: List[IdleInstance] = Field(default_factory=list)
|
66
|
+
deleted_volumes: List[LowUsageVolume] = Field(default_factory=list)
|
67
|
+
deleted_nat_gateways: List[UnusedNATGateway] = Field(default_factory=list)
|
68
|
+
total_potential_savings: float = 0.0
|
69
|
+
execution_summary: Dict[str, Any] = Field(default_factory=dict)
|
70
|
+
|
71
|
+
class AWSCostOptimizer:
|
72
|
+
"""
|
73
|
+
Enterprise AWS Cost Optimization
|
74
|
+
Migrated and enhanced from unSkript notebooks
|
75
|
+
Handles EC2 instances, EBS volumes, and other cost optimization scenarios
|
76
|
+
"""
|
77
|
+
|
78
|
+
def __init__(self, profile: Optional[str] = None):
|
79
|
+
self.profile = profile
|
80
|
+
self.session = boto3.Session(profile_name=profile) if profile else boto3.Session()
|
81
|
+
|
82
|
+
def find_idle_instances(
|
83
|
+
self,
|
84
|
+
region: str = "",
|
85
|
+
idle_cpu_threshold: int = 5,
|
86
|
+
idle_duration: int = 6
|
87
|
+
) -> Tuple[bool, Optional[List[IdleInstance]]]:
|
88
|
+
"""
|
89
|
+
Find idle EC2 instances based on CPU utilization
|
90
|
+
|
91
|
+
Migrated from: AWS_Stop_Idle_EC2_Instances.ipynb
|
92
|
+
|
93
|
+
Args:
|
94
|
+
region: AWS Region to scan (empty for all regions)
|
95
|
+
idle_cpu_threshold: CPU threshold percentage (default 5%)
|
96
|
+
idle_duration: Duration in hours to check (default 6h)
|
97
|
+
|
98
|
+
Returns:
|
99
|
+
Tuple (success, list_of_idle_instances)
|
100
|
+
"""
|
101
|
+
print_header("Cost Optimizer - Idle Instance Detection", "v0.9.1")
|
102
|
+
|
103
|
+
result = []
|
104
|
+
regions_to_check = [region] if region else self._get_all_regions()
|
105
|
+
|
106
|
+
with create_progress_bar() as progress:
|
107
|
+
task_id = progress.add_task(
|
108
|
+
f"Scanning {len(regions_to_check)} regions for idle instances...",
|
109
|
+
total=len(regions_to_check)
|
110
|
+
)
|
111
|
+
|
112
|
+
for reg in regions_to_check:
|
113
|
+
try:
|
114
|
+
idle_instances = self._scan_region_for_idle_instances(
|
115
|
+
reg, idle_cpu_threshold, idle_duration
|
116
|
+
)
|
117
|
+
result.extend(idle_instances)
|
118
|
+
progress.advance(task_id)
|
119
|
+
|
120
|
+
except Exception as e:
|
121
|
+
print_warning(f"Failed to scan region {reg}: {str(e)}")
|
122
|
+
progress.advance(task_id)
|
123
|
+
continue
|
124
|
+
|
125
|
+
if result:
|
126
|
+
print_success(f"Found {len(result)} idle instances across {len(regions_to_check)} regions")
|
127
|
+
self._display_idle_instances_table(result)
|
128
|
+
return (False, result) # False = found results (unSkript convention)
|
129
|
+
else:
|
130
|
+
print_success("No idle instances found")
|
131
|
+
return (True, None) # True = no results (unSkript convention)
|
132
|
+
|
133
|
+
def _scan_region_for_idle_instances(
|
134
|
+
self,
|
135
|
+
region: str,
|
136
|
+
idle_cpu_threshold: int,
|
137
|
+
idle_duration: int
|
138
|
+
) -> List[IdleInstance]:
|
139
|
+
"""Scan a specific region for idle instances"""
|
140
|
+
|
141
|
+
result = []
|
142
|
+
|
143
|
+
try:
|
144
|
+
ec2_client = self.session.client('ec2', region_name=region)
|
145
|
+
cloudwatch_client = self.session.client('cloudwatch', region_name=region)
|
146
|
+
|
147
|
+
# Get all running instances
|
148
|
+
response = ec2_client.describe_instances(
|
149
|
+
Filters=[{'Name': 'instance-state-name', 'Values': ['running']}]
|
150
|
+
)
|
151
|
+
|
152
|
+
for reservation in response['Reservations']:
|
153
|
+
for instance in reservation['Instances']:
|
154
|
+
instance_id = instance['InstanceId']
|
155
|
+
|
156
|
+
if self._is_instance_idle(
|
157
|
+
instance_id, idle_cpu_threshold, idle_duration, cloudwatch_client
|
158
|
+
):
|
159
|
+
# Extract tags
|
160
|
+
tags = {tag['Key']: tag['Value'] for tag in instance.get('Tags', [])}
|
161
|
+
|
162
|
+
idle_instance = IdleInstance(
|
163
|
+
instance_id=instance_id,
|
164
|
+
region=region,
|
165
|
+
instance_type=instance.get('InstanceType', 'unknown'),
|
166
|
+
tags=tags
|
167
|
+
)
|
168
|
+
|
169
|
+
# Calculate estimated cost (simplified - real implementation would use pricing API)
|
170
|
+
idle_instance.estimated_monthly_cost = self._estimate_instance_monthly_cost(
|
171
|
+
instance.get('InstanceType', 't3.micro')
|
172
|
+
)
|
173
|
+
|
174
|
+
result.append(idle_instance)
|
175
|
+
|
176
|
+
except ClientError as e:
|
177
|
+
print_warning(f"AWS API error in region {region}: {e}")
|
178
|
+
except Exception as e:
|
179
|
+
print_error(f"Unexpected error in region {region}: {e}")
|
180
|
+
|
181
|
+
return result
|
182
|
+
|
183
|
+
def _is_instance_idle(
|
184
|
+
self,
|
185
|
+
instance_id: str,
|
186
|
+
idle_cpu_threshold: int,
|
187
|
+
idle_duration: int,
|
188
|
+
cloudwatch_client
|
189
|
+
) -> bool:
|
190
|
+
"""Check if instance is idle based on CPU metrics"""
|
191
|
+
|
192
|
+
try:
|
193
|
+
now = datetime.datetime.utcnow()
|
194
|
+
start_time = now - datetime.timedelta(hours=idle_duration)
|
195
|
+
|
196
|
+
cpu_stats = cloudwatch_client.get_metric_statistics(
|
197
|
+
Namespace="AWS/EC2",
|
198
|
+
MetricName="CPUUtilization",
|
199
|
+
Dimensions=[{"Name": "InstanceId", "Value": instance_id}],
|
200
|
+
StartTime=start_time,
|
201
|
+
EndTime=now,
|
202
|
+
Period=3600, # 1 hour periods
|
203
|
+
Statistics=["Average"]
|
204
|
+
)
|
205
|
+
|
206
|
+
if not cpu_stats["Datapoints"]:
|
207
|
+
return False # No metrics = not idle (may be new instance)
|
208
|
+
|
209
|
+
# Calculate average CPU across all data points
|
210
|
+
avg_cpu = sum(
|
211
|
+
datapoint["Average"] for datapoint in cpu_stats["Datapoints"]
|
212
|
+
) / len(cpu_stats["Datapoints"])
|
213
|
+
|
214
|
+
return avg_cpu < idle_cpu_threshold
|
215
|
+
|
216
|
+
except Exception as e:
|
217
|
+
print_warning(f"Could not get metrics for {instance_id}: {e}")
|
218
|
+
return False
|
219
|
+
|
220
|
+
def stop_idle_instances(
|
221
|
+
self,
|
222
|
+
idle_instances: List[IdleInstance],
|
223
|
+
dry_run: bool = True
|
224
|
+
) -> CostOptimizationResult:
|
225
|
+
"""
|
226
|
+
Stop idle EC2 instances
|
227
|
+
|
228
|
+
Migrated from: AWS_Stop_Idle_EC2_Instances.ipynb
|
229
|
+
|
230
|
+
Args:
|
231
|
+
idle_instances: List of idle instances to stop
|
232
|
+
dry_run: If True, only simulate the action
|
233
|
+
|
234
|
+
Returns:
|
235
|
+
CostOptimizationResult with stopped instances and savings
|
236
|
+
"""
|
237
|
+
print_header(f"Cost Optimizer - Stop Idle Instances ({'DRY RUN' if dry_run else 'LIVE'})")
|
238
|
+
|
239
|
+
stopped_instances = []
|
240
|
+
total_savings = 0.0
|
241
|
+
errors = []
|
242
|
+
|
243
|
+
with create_progress_bar() as progress:
|
244
|
+
task_id = progress.add_task(
|
245
|
+
"Processing idle instances...",
|
246
|
+
total=len(idle_instances)
|
247
|
+
)
|
248
|
+
|
249
|
+
for instance in idle_instances:
|
250
|
+
try:
|
251
|
+
if dry_run:
|
252
|
+
# Simulate stop operation
|
253
|
+
stopped_instances.append(instance)
|
254
|
+
total_savings += instance.estimated_monthly_cost
|
255
|
+
console.print(f"[yellow]DRY RUN: Would stop {instance.instance_id} "
|
256
|
+
f"(${instance.estimated_monthly_cost:.2f}/month savings)[/yellow]")
|
257
|
+
else:
|
258
|
+
# Actually stop the instance
|
259
|
+
result = self._stop_single_instance(instance)
|
260
|
+
if result['success']:
|
261
|
+
stopped_instances.append(instance)
|
262
|
+
total_savings += instance.estimated_monthly_cost
|
263
|
+
print_success(f"Stopped {instance.instance_id} - "
|
264
|
+
f"${instance.estimated_monthly_cost:.2f}/month saved")
|
265
|
+
else:
|
266
|
+
errors.append(f"{instance.instance_id}: {result['error']}")
|
267
|
+
print_error(f"Failed to stop {instance.instance_id}: {result['error']}")
|
268
|
+
|
269
|
+
progress.advance(task_id)
|
270
|
+
|
271
|
+
except Exception as e:
|
272
|
+
errors.append(f"{instance.instance_id}: {str(e)}")
|
273
|
+
print_error(f"Error processing {instance.instance_id}: {e}")
|
274
|
+
progress.advance(task_id)
|
275
|
+
|
276
|
+
# Create summary
|
277
|
+
execution_summary = {
|
278
|
+
'total_instances_processed': len(idle_instances),
|
279
|
+
'successful_stops': len(stopped_instances),
|
280
|
+
'errors': errors,
|
281
|
+
'dry_run': dry_run,
|
282
|
+
'estimated_annual_savings': total_savings * 12
|
283
|
+
}
|
284
|
+
|
285
|
+
result = CostOptimizationResult(
|
286
|
+
stopped_instances=stopped_instances,
|
287
|
+
total_potential_savings=total_savings,
|
288
|
+
execution_summary=execution_summary
|
289
|
+
)
|
290
|
+
|
291
|
+
self._display_optimization_summary(result)
|
292
|
+
return result
|
293
|
+
|
294
|
+
def _stop_single_instance(self, instance: IdleInstance) -> Dict[str, Any]:
|
295
|
+
"""Stop a single EC2 instance"""
|
296
|
+
|
297
|
+
try:
|
298
|
+
ec2_client = self.session.client('ec2', region_name=instance.region)
|
299
|
+
|
300
|
+
response = ec2_client.stop_instances(InstanceIds=[instance.instance_id])
|
301
|
+
|
302
|
+
# Extract state information
|
303
|
+
instance_state = {}
|
304
|
+
for stopping_instance in response['StoppingInstances']:
|
305
|
+
instance_state[stopping_instance['InstanceId']] = stopping_instance['CurrentState']
|
306
|
+
|
307
|
+
return {
|
308
|
+
'success': True,
|
309
|
+
'state_info': instance_state,
|
310
|
+
'instance_id': instance.instance_id
|
311
|
+
}
|
312
|
+
|
313
|
+
except ClientError as e:
|
314
|
+
return {
|
315
|
+
'success': False,
|
316
|
+
'error': f"AWS API Error: {e}",
|
317
|
+
'instance_id': instance.instance_id
|
318
|
+
}
|
319
|
+
except Exception as e:
|
320
|
+
return {
|
321
|
+
'success': False,
|
322
|
+
'error': f"Unexpected error: {e}",
|
323
|
+
'instance_id': instance.instance_id
|
324
|
+
}
|
325
|
+
|
326
|
+
def _get_all_regions(self) -> List[str]:
|
327
|
+
"""Get list of all AWS regions"""
|
328
|
+
try:
|
329
|
+
ec2_client = self.session.client('ec2', region_name='us-east-1')
|
330
|
+
response = ec2_client.describe_regions()
|
331
|
+
return [region['RegionName'] for region in response['Regions']]
|
332
|
+
except Exception:
|
333
|
+
# Fallback to common regions
|
334
|
+
return [
|
335
|
+
'us-east-1', 'us-west-2', 'eu-west-1', 'eu-central-1',
|
336
|
+
'ap-southeast-1', 'ap-northeast-1'
|
337
|
+
]
|
338
|
+
|
339
|
+
def _estimate_instance_monthly_cost(self, instance_type: str) -> float:
|
340
|
+
"""
|
341
|
+
Estimate monthly cost for instance type
|
342
|
+
Note: Real implementation should use AWS Pricing API
|
343
|
+
"""
|
344
|
+
# Simplified cost estimates (USD per month for common instance types)
|
345
|
+
cost_map = {
|
346
|
+
't3.micro': 8.76,
|
347
|
+
't3.small': 17.52,
|
348
|
+
't3.medium': 35.04,
|
349
|
+
't3.large': 70.08,
|
350
|
+
't3.xlarge': 140.16,
|
351
|
+
't3.2xlarge': 280.32,
|
352
|
+
'm5.large': 87.60,
|
353
|
+
'm5.xlarge': 175.20,
|
354
|
+
'm5.2xlarge': 350.40,
|
355
|
+
'c5.large': 78.84,
|
356
|
+
'c5.xlarge': 157.68,
|
357
|
+
'r5.large': 116.8,
|
358
|
+
'r5.xlarge': 233.6,
|
359
|
+
}
|
360
|
+
|
361
|
+
return cost_map.get(instance_type, 50.0) # Default estimate
|
362
|
+
|
363
|
+
def _display_idle_instances_table(self, idle_instances: List[IdleInstance]):
|
364
|
+
"""Display idle instances in a formatted table"""
|
365
|
+
|
366
|
+
table = create_table(
|
367
|
+
title="Idle EC2 Instances Found",
|
368
|
+
columns=[
|
369
|
+
{"header": "Instance ID", "style": "cyan"},
|
370
|
+
{"header": "Region", "style": "blue"},
|
371
|
+
{"header": "Type", "style": "green"},
|
372
|
+
{"header": "Est. Monthly Cost", "style": "red"},
|
373
|
+
{"header": "Tags", "style": "yellow"},
|
374
|
+
]
|
375
|
+
)
|
376
|
+
|
377
|
+
for instance in idle_instances:
|
378
|
+
# Format tags for display
|
379
|
+
tag_display = ', '.join([f"{k}:{v}" for k, v in list(instance.tags.items())[:2]])
|
380
|
+
if len(instance.tags) > 2:
|
381
|
+
tag_display += f" (+{len(instance.tags)-2} more)"
|
382
|
+
|
383
|
+
table.add_row(
|
384
|
+
instance.instance_id,
|
385
|
+
instance.region,
|
386
|
+
instance.instance_type,
|
387
|
+
format_cost(instance.estimated_monthly_cost),
|
388
|
+
tag_display or "No tags"
|
389
|
+
)
|
390
|
+
|
391
|
+
console.print(table)
|
392
|
+
|
393
|
+
def find_low_usage_volumes(
|
394
|
+
self,
|
395
|
+
region: str = "",
|
396
|
+
threshold_days: int = 10
|
397
|
+
) -> Tuple[bool, Optional[List[LowUsageVolume]]]:
|
398
|
+
"""
|
399
|
+
Find EBS volumes with low usage based on CloudWatch metrics
|
400
|
+
|
401
|
+
Migrated from: AWS_Delete_EBS_Volumes_With_Low_Usage.ipynb
|
402
|
+
|
403
|
+
Args:
|
404
|
+
region: AWS Region to scan (empty for all regions)
|
405
|
+
threshold_days: Days to look back for usage metrics
|
406
|
+
|
407
|
+
Returns:
|
408
|
+
Tuple (success, list_of_low_usage_volumes)
|
409
|
+
"""
|
410
|
+
print_header("Cost Optimizer - Low Usage EBS Volume Detection", "v0.9.1")
|
411
|
+
|
412
|
+
result = []
|
413
|
+
regions_to_check = [region] if region else self._get_all_regions()
|
414
|
+
|
415
|
+
with create_progress_bar() as progress:
|
416
|
+
task_id = progress.add_task(
|
417
|
+
f"Scanning {len(regions_to_check)} regions for low usage volumes...",
|
418
|
+
total=len(regions_to_check)
|
419
|
+
)
|
420
|
+
|
421
|
+
for reg in regions_to_check:
|
422
|
+
try:
|
423
|
+
low_usage_volumes = self._scan_region_for_low_usage_volumes(
|
424
|
+
reg, threshold_days
|
425
|
+
)
|
426
|
+
result.extend(low_usage_volumes)
|
427
|
+
progress.advance(task_id)
|
428
|
+
|
429
|
+
except Exception as e:
|
430
|
+
print_warning(f"Failed to scan region {reg}: {str(e)}")
|
431
|
+
progress.advance(task_id)
|
432
|
+
continue
|
433
|
+
|
434
|
+
if result:
|
435
|
+
print_success(f"Found {len(result)} low usage volumes across {len(regions_to_check)} regions")
|
436
|
+
self._display_low_usage_volumes_table(result)
|
437
|
+
return (False, result) # False = found results (unSkript convention)
|
438
|
+
else:
|
439
|
+
print_success("No low usage volumes found")
|
440
|
+
return (True, None) # True = no results (unSkript convention)
|
441
|
+
|
442
|
+
def _scan_region_for_low_usage_volumes(
|
443
|
+
self,
|
444
|
+
region: str,
|
445
|
+
threshold_days: int
|
446
|
+
) -> List[LowUsageVolume]:
|
447
|
+
"""Scan a specific region for low usage EBS volumes"""
|
448
|
+
|
449
|
+
result = []
|
450
|
+
|
451
|
+
try:
|
452
|
+
ec2_client = self.session.client('ec2', region_name=region)
|
453
|
+
cloudwatch_client = self.session.client('cloudwatch', region_name=region)
|
454
|
+
|
455
|
+
# Get all EBS volumes
|
456
|
+
paginator = ec2_client.get_paginator('describe_volumes')
|
457
|
+
|
458
|
+
now = datetime.datetime.utcnow()
|
459
|
+
days_ago = now - datetime.timedelta(days=threshold_days)
|
460
|
+
|
461
|
+
for page in paginator.paginate():
|
462
|
+
for volume in page['Volumes']:
|
463
|
+
volume_id = volume['VolumeId']
|
464
|
+
|
465
|
+
# Get CloudWatch metrics for volume usage
|
466
|
+
try:
|
467
|
+
metrics_response = cloudwatch_client.get_metric_statistics(
|
468
|
+
Namespace='AWS/EBS',
|
469
|
+
MetricName='VolumeReadBytes', # Changed from VolumeUsage to more standard metric
|
470
|
+
Dimensions=[
|
471
|
+
{
|
472
|
+
'Name': 'VolumeId',
|
473
|
+
'Value': volume_id
|
474
|
+
}
|
475
|
+
],
|
476
|
+
StartTime=days_ago,
|
477
|
+
EndTime=now,
|
478
|
+
Period=86400, # Daily periods
|
479
|
+
Statistics=['Sum']
|
480
|
+
)
|
481
|
+
|
482
|
+
# Calculate average usage
|
483
|
+
total_bytes = sum(dp['Sum'] for dp in metrics_response['Datapoints'])
|
484
|
+
avg_daily_bytes = total_bytes / max(len(metrics_response['Datapoints']), 1)
|
485
|
+
avg_daily_gb = avg_daily_bytes / (1024**3) # Convert to GB
|
486
|
+
|
487
|
+
# Consider volume as low usage if < 1GB daily average read
|
488
|
+
if avg_daily_gb < 1.0 or not metrics_response['Datapoints']:
|
489
|
+
|
490
|
+
# Extract tags
|
491
|
+
tags = {tag['Key']: tag['Value'] for tag in volume.get('Tags', [])}
|
492
|
+
|
493
|
+
low_usage_volume = LowUsageVolume(
|
494
|
+
volume_id=volume_id,
|
495
|
+
region=region,
|
496
|
+
volume_type=volume.get('VolumeType', 'unknown'),
|
497
|
+
size_gb=volume.get('Size', 0),
|
498
|
+
avg_usage=avg_daily_gb,
|
499
|
+
creation_date=volume.get('CreateTime', '').isoformat() if volume.get('CreateTime') else None,
|
500
|
+
tags=tags
|
501
|
+
)
|
502
|
+
|
503
|
+
# Calculate estimated cost
|
504
|
+
low_usage_volume.estimated_monthly_cost = self._estimate_ebs_monthly_cost(
|
505
|
+
volume.get('VolumeType', 'gp3'),
|
506
|
+
volume.get('Size', 0)
|
507
|
+
)
|
508
|
+
|
509
|
+
result.append(low_usage_volume)
|
510
|
+
|
511
|
+
except ClientError as e:
|
512
|
+
# Skip volumes we can't get metrics for
|
513
|
+
if 'Throttling' not in str(e):
|
514
|
+
print_warning(f"Could not get metrics for volume {volume_id}: {e}")
|
515
|
+
continue
|
516
|
+
|
517
|
+
except ClientError as e:
|
518
|
+
print_warning(f"AWS API error in region {region}: {e}")
|
519
|
+
except Exception as e:
|
520
|
+
print_error(f"Unexpected error in region {region}: {e}")
|
521
|
+
|
522
|
+
return result
|
523
|
+
|
524
|
+
def delete_low_usage_volumes(
|
525
|
+
self,
|
526
|
+
low_usage_volumes: List[LowUsageVolume],
|
527
|
+
create_snapshots: bool = True,
|
528
|
+
dry_run: bool = True
|
529
|
+
) -> CostOptimizationResult:
|
530
|
+
"""
|
531
|
+
Delete low usage EBS volumes (optionally creating snapshots first)
|
532
|
+
|
533
|
+
Migrated from: AWS_Delete_EBS_Volumes_With_Low_Usage.ipynb
|
534
|
+
|
535
|
+
Args:
|
536
|
+
low_usage_volumes: List of volumes to delete
|
537
|
+
create_snapshots: Create snapshots before deletion
|
538
|
+
dry_run: If True, only simulate the action
|
539
|
+
|
540
|
+
Returns:
|
541
|
+
CostOptimizationResult with deleted volumes and savings
|
542
|
+
"""
|
543
|
+
print_header(f"Cost Optimizer - Delete Low Usage Volumes ({'DRY RUN' if dry_run else 'LIVE'})")
|
544
|
+
|
545
|
+
deleted_volumes = []
|
546
|
+
total_savings = 0.0
|
547
|
+
errors = []
|
548
|
+
|
549
|
+
with create_progress_bar() as progress:
|
550
|
+
task_id = progress.add_task(
|
551
|
+
"Processing low usage volumes...",
|
552
|
+
total=len(low_usage_volumes)
|
553
|
+
)
|
554
|
+
|
555
|
+
for volume in low_usage_volumes:
|
556
|
+
try:
|
557
|
+
if dry_run:
|
558
|
+
# Simulate deletion
|
559
|
+
deleted_volumes.append(volume)
|
560
|
+
total_savings += volume.estimated_monthly_cost
|
561
|
+
console.print(f"[yellow]DRY RUN: Would delete {volume.volume_id} "
|
562
|
+
f"({volume.size_gb}GB {volume.volume_type}) - "
|
563
|
+
f"${volume.estimated_monthly_cost:.2f}/month savings[/yellow]")
|
564
|
+
else:
|
565
|
+
# Actually delete the volume
|
566
|
+
result = self._delete_single_volume(volume, create_snapshots)
|
567
|
+
if result['success']:
|
568
|
+
deleted_volumes.append(volume)
|
569
|
+
total_savings += volume.estimated_monthly_cost
|
570
|
+
print_success(f"Deleted {volume.volume_id} - "
|
571
|
+
f"${volume.estimated_monthly_cost:.2f}/month saved")
|
572
|
+
else:
|
573
|
+
errors.append(f"{volume.volume_id}: {result['error']}")
|
574
|
+
print_error(f"Failed to delete {volume.volume_id}: {result['error']}")
|
575
|
+
|
576
|
+
progress.advance(task_id)
|
577
|
+
|
578
|
+
except Exception as e:
|
579
|
+
errors.append(f"{volume.volume_id}: {str(e)}")
|
580
|
+
print_error(f"Error processing {volume.volume_id}: {e}")
|
581
|
+
progress.advance(task_id)
|
582
|
+
|
583
|
+
# Create summary
|
584
|
+
execution_summary = {
|
585
|
+
'total_volumes_processed': len(low_usage_volumes),
|
586
|
+
'successful_deletions': len(deleted_volumes),
|
587
|
+
'errors': errors,
|
588
|
+
'dry_run': dry_run,
|
589
|
+
'snapshots_created': create_snapshots,
|
590
|
+
'estimated_annual_savings': total_savings * 12
|
591
|
+
}
|
592
|
+
|
593
|
+
result = CostOptimizationResult(
|
594
|
+
deleted_volumes=deleted_volumes,
|
595
|
+
total_potential_savings=total_savings,
|
596
|
+
execution_summary=execution_summary
|
597
|
+
)
|
598
|
+
|
599
|
+
self._display_volume_optimization_summary(result)
|
600
|
+
return result
|
601
|
+
|
602
|
+
def _delete_single_volume(self, volume: LowUsageVolume, create_snapshot: bool = True) -> Dict[str, Any]:
|
603
|
+
"""Delete a single EBS volume (with optional snapshot)"""
|
604
|
+
|
605
|
+
try:
|
606
|
+
ec2_client = self.session.client('ec2', region_name=volume.region)
|
607
|
+
|
608
|
+
snapshot_id = None
|
609
|
+
if create_snapshot:
|
610
|
+
# Create snapshot first
|
611
|
+
snapshot_response = ec2_client.create_snapshot(
|
612
|
+
VolumeId=volume.volume_id,
|
613
|
+
Description=f"Automated backup before deleting low usage volume {volume.volume_id}"
|
614
|
+
)
|
615
|
+
snapshot_id = snapshot_response['SnapshotId']
|
616
|
+
print_success(f"Created snapshot {snapshot_id} for volume {volume.volume_id}")
|
617
|
+
|
618
|
+
# Delete the volume
|
619
|
+
ec2_client.delete_volume(VolumeId=volume.volume_id)
|
620
|
+
|
621
|
+
return {
|
622
|
+
'success': True,
|
623
|
+
'snapshot_id': snapshot_id,
|
624
|
+
'volume_id': volume.volume_id
|
625
|
+
}
|
626
|
+
|
627
|
+
except ClientError as e:
|
628
|
+
return {
|
629
|
+
'success': False,
|
630
|
+
'error': f"AWS API Error: {e}",
|
631
|
+
'volume_id': volume.volume_id
|
632
|
+
}
|
633
|
+
except Exception as e:
|
634
|
+
return {
|
635
|
+
'success': False,
|
636
|
+
'error': f"Unexpected error: {e}",
|
637
|
+
'volume_id': volume.volume_id
|
638
|
+
}
|
639
|
+
|
640
|
+
def _estimate_ebs_monthly_cost(self, volume_type: str, size_gb: int) -> float:
|
641
|
+
"""
|
642
|
+
Estimate monthly cost for EBS volume
|
643
|
+
Note: Real implementation should use AWS Pricing API
|
644
|
+
"""
|
645
|
+
# Simplified cost estimates (USD per GB per month)
|
646
|
+
cost_per_gb = {
|
647
|
+
'gp3': 0.08,
|
648
|
+
'gp2': 0.10,
|
649
|
+
'io1': 0.125,
|
650
|
+
'io2': 0.125,
|
651
|
+
'st1': 0.045,
|
652
|
+
'sc1': 0.025,
|
653
|
+
'standard': 0.05
|
654
|
+
}
|
655
|
+
|
656
|
+
rate = cost_per_gb.get(volume_type, 0.08) # Default to gp3
|
657
|
+
return size_gb * rate
|
658
|
+
|
659
|
+
def _display_low_usage_volumes_table(self, low_usage_volumes: List[LowUsageVolume]):
|
660
|
+
"""Display low usage volumes in a formatted table"""
|
661
|
+
|
662
|
+
table = create_table(
|
663
|
+
title="Low Usage EBS Volumes Found",
|
664
|
+
columns=[
|
665
|
+
{"header": "Volume ID", "style": "cyan"},
|
666
|
+
{"header": "Region", "style": "blue"},
|
667
|
+
{"header": "Type", "style": "green"},
|
668
|
+
{"header": "Size (GB)", "style": "yellow"},
|
669
|
+
{"header": "Est. Monthly Cost", "style": "red"},
|
670
|
+
{"header": "Tags", "style": "magenta"},
|
671
|
+
]
|
672
|
+
)
|
673
|
+
|
674
|
+
for volume in low_usage_volumes:
|
675
|
+
# Format tags for display
|
676
|
+
tag_display = ', '.join([f"{k}:{v}" for k, v in list(volume.tags.items())[:2]])
|
677
|
+
if len(volume.tags) > 2:
|
678
|
+
tag_display += f" (+{len(volume.tags)-2} more)"
|
679
|
+
|
680
|
+
table.add_row(
|
681
|
+
volume.volume_id,
|
682
|
+
volume.region,
|
683
|
+
volume.volume_type,
|
684
|
+
str(volume.size_gb),
|
685
|
+
format_cost(volume.estimated_monthly_cost),
|
686
|
+
tag_display or "No tags"
|
687
|
+
)
|
688
|
+
|
689
|
+
console.print(table)
|
690
|
+
|
691
|
+
def _display_volume_optimization_summary(self, result: CostOptimizationResult):
|
692
|
+
"""Display volume optimization summary"""
|
693
|
+
|
694
|
+
summary = result.execution_summary
|
695
|
+
|
696
|
+
console.print()
|
697
|
+
print_header("EBS Volume Optimization Summary")
|
698
|
+
|
699
|
+
# Create summary table
|
700
|
+
summary_table = create_table(
|
701
|
+
title="Volume Optimization Results",
|
702
|
+
columns=[
|
703
|
+
{"header": "Metric", "style": "cyan"},
|
704
|
+
{"header": "Value", "style": "green bold"}
|
705
|
+
]
|
706
|
+
)
|
707
|
+
|
708
|
+
summary_table.add_row("Volumes Processed", str(summary['total_volumes_processed']))
|
709
|
+
summary_table.add_row("Successfully Deleted", str(summary['successful_deletions']))
|
710
|
+
summary_table.add_row("Errors", str(len(summary['errors'])))
|
711
|
+
summary_table.add_row("Snapshots Created", "Yes" if summary['snapshots_created'] else "No")
|
712
|
+
summary_table.add_row("Monthly Savings", format_cost(result.total_potential_savings))
|
713
|
+
summary_table.add_row("Annual Savings", format_cost(summary['estimated_annual_savings']))
|
714
|
+
summary_table.add_row("Mode", "DRY RUN" if summary['dry_run'] else "LIVE EXECUTION")
|
715
|
+
|
716
|
+
console.print(summary_table)
|
717
|
+
|
718
|
+
if summary['errors']:
|
719
|
+
print_warning(f"Encountered {len(summary['errors'])} errors:")
|
720
|
+
for error in summary['errors']:
|
721
|
+
console.print(f" [red]• {error}[/red]")
|
722
|
+
|
723
|
+
def find_unused_nat_gateways(
|
724
|
+
self,
|
725
|
+
region: str = "",
|
726
|
+
number_of_days: int = 7
|
727
|
+
) -> Tuple[bool, Optional[List[UnusedNATGateway]]]:
|
728
|
+
"""
|
729
|
+
Find unused NAT Gateways based on CloudWatch connection metrics
|
730
|
+
|
731
|
+
Migrated from: AWS_Delete_Unused_NAT_Gateways.ipynb
|
732
|
+
|
733
|
+
Args:
|
734
|
+
region: AWS Region to scan (empty for all regions)
|
735
|
+
number_of_days: Days to look back for usage metrics
|
736
|
+
|
737
|
+
Returns:
|
738
|
+
Tuple (success, list_of_unused_nat_gateways)
|
739
|
+
"""
|
740
|
+
print_header("Cost Optimizer - Unused NAT Gateway Detection", "v0.9.1")
|
741
|
+
|
742
|
+
result = []
|
743
|
+
regions_to_check = [region] if region else self._get_all_regions()
|
744
|
+
|
745
|
+
with create_progress_bar() as progress:
|
746
|
+
task_id = progress.add_task(
|
747
|
+
f"Scanning {len(regions_to_check)} regions for unused NAT Gateways...",
|
748
|
+
total=len(regions_to_check)
|
749
|
+
)
|
750
|
+
|
751
|
+
for reg in regions_to_check:
|
752
|
+
try:
|
753
|
+
unused_gateways = self._scan_region_for_unused_nat_gateways(
|
754
|
+
reg, number_of_days
|
755
|
+
)
|
756
|
+
result.extend(unused_gateways)
|
757
|
+
progress.advance(task_id)
|
758
|
+
|
759
|
+
except Exception as e:
|
760
|
+
print_warning(f"Failed to scan region {reg}: {str(e)}")
|
761
|
+
progress.advance(task_id)
|
762
|
+
continue
|
763
|
+
|
764
|
+
if result:
|
765
|
+
print_success(f"Found {len(result)} unused NAT Gateways across {len(regions_to_check)} regions")
|
766
|
+
self._display_unused_nat_gateways_table(result)
|
767
|
+
return (False, result) # False = found results (unSkript convention)
|
768
|
+
else:
|
769
|
+
print_success("No unused NAT Gateways found")
|
770
|
+
return (True, None) # True = no results (unSkript convention)
|
771
|
+
|
772
|
+
def _scan_region_for_unused_nat_gateways(
|
773
|
+
self,
|
774
|
+
region: str,
|
775
|
+
number_of_days: int
|
776
|
+
) -> List[UnusedNATGateway]:
|
777
|
+
"""Scan a specific region for unused NAT Gateways"""
|
778
|
+
|
779
|
+
result = []
|
780
|
+
|
781
|
+
try:
|
782
|
+
ec2_client = self.session.client('ec2', region_name=region)
|
783
|
+
cloudwatch_client = self.session.client('cloudwatch', region_name=region)
|
784
|
+
|
785
|
+
# Get all NAT Gateways
|
786
|
+
response = ec2_client.describe_nat_gateways()
|
787
|
+
|
788
|
+
end_time = datetime.datetime.utcnow()
|
789
|
+
start_time = end_time - datetime.timedelta(days=number_of_days)
|
790
|
+
|
791
|
+
for nat_gateway in response['NatGateways']:
|
792
|
+
if nat_gateway['State'] == 'deleted':
|
793
|
+
continue
|
794
|
+
|
795
|
+
nat_gateway_id = nat_gateway['NatGatewayId']
|
796
|
+
|
797
|
+
# Check if NAT Gateway is used based on connection metrics
|
798
|
+
if not self._is_nat_gateway_used(
|
799
|
+
cloudwatch_client, nat_gateway, start_time, end_time, number_of_days
|
800
|
+
):
|
801
|
+
# Extract tags
|
802
|
+
tags = {tag['Key']: tag['Value'] for tag in nat_gateway.get('Tags', [])}
|
803
|
+
|
804
|
+
unused_gateway = UnusedNATGateway(
|
805
|
+
nat_gateway_id=nat_gateway_id,
|
806
|
+
region=region,
|
807
|
+
vpc_id=nat_gateway.get('VpcId', ''),
|
808
|
+
state=nat_gateway.get('State', ''),
|
809
|
+
creation_date=nat_gateway.get('CreateTime', '').isoformat() if nat_gateway.get('CreateTime') else None,
|
810
|
+
tags=tags
|
811
|
+
)
|
812
|
+
|
813
|
+
result.append(unused_gateway)
|
814
|
+
|
815
|
+
except ClientError as e:
|
816
|
+
print_warning(f"AWS API error in region {region}: {e}")
|
817
|
+
except Exception as e:
|
818
|
+
print_error(f"Unexpected error in region {region}: {e}")
|
819
|
+
|
820
|
+
return result
|
821
|
+
|
822
|
+
def _is_nat_gateway_used(
|
823
|
+
self,
|
824
|
+
cloudwatch_client,
|
825
|
+
nat_gateway: Dict[str, Any],
|
826
|
+
start_time: datetime.datetime,
|
827
|
+
end_time: datetime.datetime,
|
828
|
+
number_of_days: int
|
829
|
+
) -> bool:
|
830
|
+
"""Check if NAT Gateway is used based on connection metrics"""
|
831
|
+
|
832
|
+
try:
|
833
|
+
if nat_gateway['State'] != 'available':
|
834
|
+
return True # Consider non-available gateways as "used"
|
835
|
+
|
836
|
+
# Get ActiveConnectionCount metrics
|
837
|
+
metrics_response = cloudwatch_client.get_metric_statistics(
|
838
|
+
Namespace='AWS/NATGateway',
|
839
|
+
MetricName='ActiveConnectionCount',
|
840
|
+
Dimensions=[
|
841
|
+
{
|
842
|
+
'Name': 'NatGatewayId',
|
843
|
+
'Value': nat_gateway['NatGatewayId']
|
844
|
+
},
|
845
|
+
],
|
846
|
+
StartTime=start_time,
|
847
|
+
EndTime=end_time,
|
848
|
+
Period=86400 * number_of_days, # Daily periods
|
849
|
+
Statistics=['Sum']
|
850
|
+
)
|
851
|
+
|
852
|
+
datapoints = metrics_response.get('Datapoints', [])
|
853
|
+
|
854
|
+
if not datapoints:
|
855
|
+
return False # No metrics = unused
|
856
|
+
|
857
|
+
# Check if there are any active connections
|
858
|
+
total_connections = sum(dp['Sum'] for dp in datapoints)
|
859
|
+
return total_connections > 0
|
860
|
+
|
861
|
+
except Exception as e:
|
862
|
+
print_warning(f"Could not get metrics for NAT Gateway {nat_gateway['NatGatewayId']}: {e}")
|
863
|
+
return True # Assume used if we can't get metrics
|
864
|
+
|
865
|
+
def delete_unused_nat_gateways(
|
866
|
+
self,
|
867
|
+
unused_nat_gateways: List[UnusedNATGateway],
|
868
|
+
dry_run: bool = True
|
869
|
+
) -> CostOptimizationResult:
|
870
|
+
"""
|
871
|
+
Delete unused NAT Gateways
|
872
|
+
|
873
|
+
Migrated from: AWS_Delete_Unused_NAT_Gateways.ipynb
|
874
|
+
|
875
|
+
Args:
|
876
|
+
unused_nat_gateways: List of NAT Gateways to delete
|
877
|
+
dry_run: If True, only simulate the action
|
878
|
+
|
879
|
+
Returns:
|
880
|
+
CostOptimizationResult with deleted NAT Gateways and savings
|
881
|
+
"""
|
882
|
+
print_header(f"Cost Optimizer - Delete Unused NAT Gateways ({'DRY RUN' if dry_run else 'LIVE'})")
|
883
|
+
|
884
|
+
deleted_gateways = []
|
885
|
+
total_savings = 0.0
|
886
|
+
errors = []
|
887
|
+
|
888
|
+
with create_progress_bar() as progress:
|
889
|
+
task_id = progress.add_task(
|
890
|
+
"Processing unused NAT Gateways...",
|
891
|
+
total=len(unused_nat_gateways)
|
892
|
+
)
|
893
|
+
|
894
|
+
for gateway in unused_nat_gateways:
|
895
|
+
try:
|
896
|
+
if dry_run:
|
897
|
+
# Simulate deletion
|
898
|
+
deleted_gateways.append(gateway)
|
899
|
+
total_savings += gateway.estimated_monthly_cost
|
900
|
+
console.print(f"[yellow]DRY RUN: Would delete {gateway.nat_gateway_id} "
|
901
|
+
f"in VPC {gateway.vpc_id} - "
|
902
|
+
f"${gateway.estimated_monthly_cost:.2f}/month savings[/yellow]")
|
903
|
+
else:
|
904
|
+
# Actually delete the NAT Gateway
|
905
|
+
result = self._delete_single_nat_gateway(gateway)
|
906
|
+
if result['success']:
|
907
|
+
deleted_gateways.append(gateway)
|
908
|
+
total_savings += gateway.estimated_monthly_cost
|
909
|
+
print_success(f"Deleted {gateway.nat_gateway_id} - "
|
910
|
+
f"${gateway.estimated_monthly_cost:.2f}/month saved")
|
911
|
+
else:
|
912
|
+
errors.append(f"{gateway.nat_gateway_id}: {result['error']}")
|
913
|
+
print_error(f"Failed to delete {gateway.nat_gateway_id}: {result['error']}")
|
914
|
+
|
915
|
+
progress.advance(task_id)
|
916
|
+
|
917
|
+
except Exception as e:
|
918
|
+
errors.append(f"{gateway.nat_gateway_id}: {str(e)}")
|
919
|
+
print_error(f"Error processing {gateway.nat_gateway_id}: {e}")
|
920
|
+
progress.advance(task_id)
|
921
|
+
|
922
|
+
# Create summary
|
923
|
+
execution_summary = {
|
924
|
+
'total_nat_gateways_processed': len(unused_nat_gateways),
|
925
|
+
'successful_deletions': len(deleted_gateways),
|
926
|
+
'errors': errors,
|
927
|
+
'dry_run': dry_run,
|
928
|
+
'estimated_annual_savings': total_savings * 12
|
929
|
+
}
|
930
|
+
|
931
|
+
result = CostOptimizationResult(
|
932
|
+
deleted_nat_gateways=deleted_gateways,
|
933
|
+
total_potential_savings=total_savings,
|
934
|
+
execution_summary=execution_summary
|
935
|
+
)
|
936
|
+
|
937
|
+
self._display_nat_gateway_optimization_summary(result)
|
938
|
+
return result
|
939
|
+
|
940
|
+
def _delete_single_nat_gateway(self, gateway: UnusedNATGateway) -> Dict[str, Any]:
|
941
|
+
"""Delete a single NAT Gateway"""
|
942
|
+
|
943
|
+
try:
|
944
|
+
ec2_client = self.session.client('ec2', region_name=gateway.region)
|
945
|
+
|
946
|
+
response = ec2_client.delete_nat_gateway(NatGatewayId=gateway.nat_gateway_id)
|
947
|
+
|
948
|
+
return {
|
949
|
+
'success': True,
|
950
|
+
'response': response,
|
951
|
+
'nat_gateway_id': gateway.nat_gateway_id
|
952
|
+
}
|
953
|
+
|
954
|
+
except ClientError as e:
|
955
|
+
return {
|
956
|
+
'success': False,
|
957
|
+
'error': f"AWS API Error: {e}",
|
958
|
+
'nat_gateway_id': gateway.nat_gateway_id
|
959
|
+
}
|
960
|
+
except Exception as e:
|
961
|
+
return {
|
962
|
+
'success': False,
|
963
|
+
'error': f"Unexpected error: {e}",
|
964
|
+
'nat_gateway_id': gateway.nat_gateway_id
|
965
|
+
}
|
966
|
+
|
967
|
+
def _display_unused_nat_gateways_table(self, unused_gateways: List[UnusedNATGateway]):
|
968
|
+
"""Display unused NAT Gateways in a formatted table"""
|
969
|
+
|
970
|
+
table = create_table(
|
971
|
+
title="Unused NAT Gateways Found",
|
972
|
+
columns=[
|
973
|
+
{"header": "NAT Gateway ID", "style": "cyan"},
|
974
|
+
{"header": "Region", "style": "blue"},
|
975
|
+
{"header": "VPC ID", "style": "green"},
|
976
|
+
{"header": "State", "style": "yellow"},
|
977
|
+
{"header": "Est. Monthly Cost", "style": "red"},
|
978
|
+
{"header": "Tags", "style": "magenta"},
|
979
|
+
]
|
980
|
+
)
|
981
|
+
|
982
|
+
for gateway in unused_gateways:
|
983
|
+
# Format tags for display
|
984
|
+
tag_display = ', '.join([f"{k}:{v}" for k, v in list(gateway.tags.items())[:2]])
|
985
|
+
if len(gateway.tags) > 2:
|
986
|
+
tag_display += f" (+{len(gateway.tags)-2} more)"
|
987
|
+
|
988
|
+
table.add_row(
|
989
|
+
gateway.nat_gateway_id,
|
990
|
+
gateway.region,
|
991
|
+
gateway.vpc_id,
|
992
|
+
gateway.state,
|
993
|
+
format_cost(gateway.estimated_monthly_cost),
|
994
|
+
tag_display or "No tags"
|
995
|
+
)
|
996
|
+
|
997
|
+
console.print(table)
|
998
|
+
|
999
|
+
def _display_nat_gateway_optimization_summary(self, result: CostOptimizationResult):
|
1000
|
+
"""Display NAT Gateway optimization summary"""
|
1001
|
+
|
1002
|
+
summary = result.execution_summary
|
1003
|
+
|
1004
|
+
console.print()
|
1005
|
+
print_header("NAT Gateway Optimization Summary")
|
1006
|
+
|
1007
|
+
# Create summary table
|
1008
|
+
summary_table = create_table(
|
1009
|
+
title="NAT Gateway Optimization Results",
|
1010
|
+
columns=[
|
1011
|
+
{"header": "Metric", "style": "cyan"},
|
1012
|
+
{"header": "Value", "style": "green bold"}
|
1013
|
+
]
|
1014
|
+
)
|
1015
|
+
|
1016
|
+
summary_table.add_row("NAT Gateways Processed", str(summary['total_nat_gateways_processed']))
|
1017
|
+
summary_table.add_row("Successfully Deleted", str(summary['successful_deletions']))
|
1018
|
+
summary_table.add_row("Errors", str(len(summary['errors'])))
|
1019
|
+
summary_table.add_row("Monthly Savings", format_cost(result.total_potential_savings))
|
1020
|
+
summary_table.add_row("Annual Savings", format_cost(summary['estimated_annual_savings']))
|
1021
|
+
summary_table.add_row("Mode", "DRY RUN" if summary['dry_run'] else "LIVE EXECUTION")
|
1022
|
+
|
1023
|
+
console.print(summary_table)
|
1024
|
+
|
1025
|
+
if summary['errors']:
|
1026
|
+
print_warning(f"Encountered {len(summary['errors'])} errors:")
|
1027
|
+
for error in summary['errors']:
|
1028
|
+
console.print(f" [red]• {error}[/red]")
|
1029
|
+
|
1030
|
+
def _display_optimization_summary(self, result: CostOptimizationResult):
|
1031
|
+
"""Display cost optimization summary"""
|
1032
|
+
|
1033
|
+
summary = result.execution_summary
|
1034
|
+
|
1035
|
+
console.print()
|
1036
|
+
print_header("Cost Optimization Summary")
|
1037
|
+
|
1038
|
+
# Create summary table
|
1039
|
+
summary_table = create_table(
|
1040
|
+
title="Optimization Results",
|
1041
|
+
columns=[
|
1042
|
+
{"header": "Metric", "style": "cyan"},
|
1043
|
+
{"header": "Value", "style": "green bold"}
|
1044
|
+
]
|
1045
|
+
)
|
1046
|
+
|
1047
|
+
summary_table.add_row("Instances Processed", str(summary['total_instances_processed']))
|
1048
|
+
summary_table.add_row("Successfully Stopped", str(summary['successful_stops']))
|
1049
|
+
summary_table.add_row("Errors", str(len(summary['errors'])))
|
1050
|
+
summary_table.add_row("Monthly Savings", format_cost(result.total_potential_savings))
|
1051
|
+
summary_table.add_row("Annual Savings", format_cost(summary['estimated_annual_savings']))
|
1052
|
+
summary_table.add_row("Mode", "DRY RUN" if summary['dry_run'] else "LIVE EXECUTION")
|
1053
|
+
|
1054
|
+
console.print(summary_table)
|
1055
|
+
|
1056
|
+
if summary['errors']:
|
1057
|
+
print_warning(f"Encountered {len(summary['errors'])} errors:")
|
1058
|
+
for error in summary['errors']:
|
1059
|
+
console.print(f" [red]• {error}[/red]")
|
1060
|
+
|
1061
|
+
|
1062
|
+
# CLI Interface Functions (compatible with existing runbooks architecture)
|
1063
|
+
def find_and_stop_idle_instances(
|
1064
|
+
profile: Optional[str] = None,
|
1065
|
+
region: str = "",
|
1066
|
+
idle_cpu_threshold: int = 5,
|
1067
|
+
idle_duration: int = 6,
|
1068
|
+
instance_ids: Optional[List[str]] = None,
|
1069
|
+
dry_run: bool = True
|
1070
|
+
) -> Dict[str, Any]:
|
1071
|
+
"""
|
1072
|
+
Main function for cost optimization - find and stop idle EC2 instances
|
1073
|
+
|
1074
|
+
This function replicates the complete unSkript notebook workflow
|
1075
|
+
"""
|
1076
|
+
|
1077
|
+
optimizer = AWSCostOptimizer(profile=profile)
|
1078
|
+
|
1079
|
+
# Step 1: Find idle instances (or use provided instance IDs)
|
1080
|
+
if instance_ids:
|
1081
|
+
print_warning("Using provided instance IDs - skipping idle detection")
|
1082
|
+
# Create IdleInstance objects from provided IDs
|
1083
|
+
idle_instances = []
|
1084
|
+
for instance_id in instance_ids:
|
1085
|
+
idle_instance = IdleInstance(
|
1086
|
+
instance_id=instance_id,
|
1087
|
+
region=region,
|
1088
|
+
estimated_monthly_cost=50.0 # Default estimate
|
1089
|
+
)
|
1090
|
+
idle_instances.append(idle_instance)
|
1091
|
+
success = False
|
1092
|
+
found_instances = idle_instances
|
1093
|
+
else:
|
1094
|
+
success, found_instances = optimizer.find_idle_instances(
|
1095
|
+
region=region,
|
1096
|
+
idle_cpu_threshold=idle_cpu_threshold,
|
1097
|
+
idle_duration=idle_duration
|
1098
|
+
)
|
1099
|
+
|
1100
|
+
if success or not found_instances: # No idle instances found
|
1101
|
+
print_success("No idle instances to process")
|
1102
|
+
return {
|
1103
|
+
'idle_instances_found': 0,
|
1104
|
+
'instances_stopped': 0,
|
1105
|
+
'potential_savings': 0.0,
|
1106
|
+
'status': 'completed'
|
1107
|
+
}
|
1108
|
+
|
1109
|
+
# Step 2: Stop idle instances
|
1110
|
+
optimization_result = optimizer.stop_idle_instances(
|
1111
|
+
idle_instances=found_instances,
|
1112
|
+
dry_run=dry_run
|
1113
|
+
)
|
1114
|
+
|
1115
|
+
return {
|
1116
|
+
'idle_instances_found': len(found_instances),
|
1117
|
+
'instances_stopped': len(optimization_result.stopped_instances),
|
1118
|
+
'potential_monthly_savings': optimization_result.total_potential_savings,
|
1119
|
+
'potential_annual_savings': optimization_result.execution_summary['estimated_annual_savings'],
|
1120
|
+
'dry_run': dry_run,
|
1121
|
+
'status': 'completed',
|
1122
|
+
'details': optimization_result.execution_summary
|
1123
|
+
}
|
1124
|
+
|
1125
|
+
|
1126
|
+
# Additional CLI Functions for EBS Volume Optimization
|
1127
|
+
def find_and_delete_low_usage_volumes(
|
1128
|
+
profile: Optional[str] = None,
|
1129
|
+
region: str = "",
|
1130
|
+
threshold_days: int = 10,
|
1131
|
+
volume_ids: Optional[List[str]] = None,
|
1132
|
+
create_snapshots: bool = True,
|
1133
|
+
dry_run: bool = True
|
1134
|
+
) -> Dict[str, Any]:
|
1135
|
+
"""
|
1136
|
+
Main function for EBS cost optimization - find and delete low usage volumes
|
1137
|
+
|
1138
|
+
Migrated from: AWS_Delete_EBS_Volumes_With_Low_Usage.ipynb
|
1139
|
+
"""
|
1140
|
+
|
1141
|
+
optimizer = AWSCostOptimizer(profile=profile)
|
1142
|
+
|
1143
|
+
# Step 1: Find low usage volumes (or use provided volume IDs)
|
1144
|
+
if volume_ids:
|
1145
|
+
print_warning("Using provided volume IDs - skipping usage detection")
|
1146
|
+
# Create LowUsageVolume objects from provided IDs
|
1147
|
+
low_usage_volumes = []
|
1148
|
+
for volume_id in volume_ids:
|
1149
|
+
low_usage_volume = LowUsageVolume(
|
1150
|
+
volume_id=volume_id,
|
1151
|
+
region=region,
|
1152
|
+
estimated_monthly_cost=5.0 # Default estimate
|
1153
|
+
)
|
1154
|
+
low_usage_volumes.append(low_usage_volume)
|
1155
|
+
success = False
|
1156
|
+
found_volumes = low_usage_volumes
|
1157
|
+
else:
|
1158
|
+
success, found_volumes = optimizer.find_low_usage_volumes(
|
1159
|
+
region=region,
|
1160
|
+
threshold_days=threshold_days
|
1161
|
+
)
|
1162
|
+
|
1163
|
+
if success or not found_volumes: # No low usage volumes found
|
1164
|
+
print_success("No low usage volumes to process")
|
1165
|
+
return {
|
1166
|
+
'low_usage_volumes_found': 0,
|
1167
|
+
'volumes_deleted': 0,
|
1168
|
+
'potential_savings': 0.0,
|
1169
|
+
'status': 'completed'
|
1170
|
+
}
|
1171
|
+
|
1172
|
+
# Step 2: Delete low usage volumes
|
1173
|
+
optimization_result = optimizer.delete_low_usage_volumes(
|
1174
|
+
low_usage_volumes=found_volumes,
|
1175
|
+
create_snapshots=create_snapshots,
|
1176
|
+
dry_run=dry_run
|
1177
|
+
)
|
1178
|
+
|
1179
|
+
return {
|
1180
|
+
'low_usage_volumes_found': len(found_volumes),
|
1181
|
+
'volumes_deleted': len(optimization_result.deleted_volumes),
|
1182
|
+
'potential_monthly_savings': optimization_result.total_potential_savings,
|
1183
|
+
'potential_annual_savings': optimization_result.execution_summary['estimated_annual_savings'],
|
1184
|
+
'snapshots_created': create_snapshots,
|
1185
|
+
'dry_run': dry_run,
|
1186
|
+
'status': 'completed',
|
1187
|
+
'details': optimization_result.execution_summary
|
1188
|
+
}
|
1189
|
+
|
1190
|
+
|
1191
|
+
def comprehensive_cost_optimization(
|
1192
|
+
profile: Optional[str] = None,
|
1193
|
+
region: str = "",
|
1194
|
+
idle_cpu_threshold: int = 5,
|
1195
|
+
idle_duration: int = 6,
|
1196
|
+
volume_threshold_days: int = 10,
|
1197
|
+
dry_run: bool = True
|
1198
|
+
) -> Dict[str, Any]:
|
1199
|
+
"""
|
1200
|
+
Comprehensive cost optimization combining EC2 and EBS optimizations
|
1201
|
+
|
1202
|
+
This combines multiple unSkript notebooks:
|
1203
|
+
- AWS_Stop_Idle_EC2_Instances.ipynb
|
1204
|
+
- AWS_Delete_EBS_Volumes_With_Low_Usage.ipynb
|
1205
|
+
"""
|
1206
|
+
|
1207
|
+
print_header("Comprehensive AWS Cost Optimization", "v0.9.1")
|
1208
|
+
|
1209
|
+
total_monthly_savings = 0.0
|
1210
|
+
total_annual_savings = 0.0
|
1211
|
+
results = {}
|
1212
|
+
|
1213
|
+
# Step 1: EC2 Instance Optimization
|
1214
|
+
try:
|
1215
|
+
print_header("Phase 1: EC2 Instance Optimization")
|
1216
|
+
ec2_result = find_and_stop_idle_instances(
|
1217
|
+
profile=profile,
|
1218
|
+
region=region,
|
1219
|
+
idle_cpu_threshold=idle_cpu_threshold,
|
1220
|
+
idle_duration=idle_duration,
|
1221
|
+
dry_run=dry_run
|
1222
|
+
)
|
1223
|
+
results['ec2_optimization'] = ec2_result
|
1224
|
+
total_monthly_savings += ec2_result.get('potential_monthly_savings', 0.0)
|
1225
|
+
total_annual_savings += ec2_result.get('potential_annual_savings', 0.0)
|
1226
|
+
|
1227
|
+
except Exception as e:
|
1228
|
+
print_error(f"EC2 optimization failed: {e}")
|
1229
|
+
results['ec2_optimization'] = {'error': str(e)}
|
1230
|
+
|
1231
|
+
# Step 2: EBS Volume Optimization
|
1232
|
+
try:
|
1233
|
+
print_header("Phase 2: EBS Volume Optimization")
|
1234
|
+
ebs_result = find_and_delete_low_usage_volumes(
|
1235
|
+
profile=profile,
|
1236
|
+
region=region,
|
1237
|
+
threshold_days=volume_threshold_days,
|
1238
|
+
create_snapshots=True,
|
1239
|
+
dry_run=dry_run
|
1240
|
+
)
|
1241
|
+
results['ebs_optimization'] = ebs_result
|
1242
|
+
total_monthly_savings += ebs_result.get('potential_monthly_savings', 0.0)
|
1243
|
+
total_annual_savings += ebs_result.get('potential_annual_savings', 0.0)
|
1244
|
+
|
1245
|
+
except Exception as e:
|
1246
|
+
print_error(f"EBS optimization failed: {e}")
|
1247
|
+
results['ebs_optimization'] = {'error': str(e)}
|
1248
|
+
|
1249
|
+
# Summary
|
1250
|
+
print_header("Comprehensive Cost Optimization Summary")
|
1251
|
+
|
1252
|
+
summary_table = create_table(
|
1253
|
+
title="Total Cost Optimization Impact",
|
1254
|
+
columns=[
|
1255
|
+
{"header": "Resource Type", "style": "cyan"},
|
1256
|
+
{"header": "Items Found", "style": "yellow"},
|
1257
|
+
{"header": "Items Processed", "style": "green"},
|
1258
|
+
{"header": "Monthly Savings", "style": "red bold"},
|
1259
|
+
]
|
1260
|
+
)
|
1261
|
+
|
1262
|
+
# EC2 Summary
|
1263
|
+
ec2_found = results.get('ec2_optimization', {}).get('idle_instances_found', 0)
|
1264
|
+
ec2_stopped = results.get('ec2_optimization', {}).get('instances_stopped', 0)
|
1265
|
+
ec2_savings = results.get('ec2_optimization', {}).get('potential_monthly_savings', 0.0)
|
1266
|
+
|
1267
|
+
summary_table.add_row(
|
1268
|
+
"EC2 Instances",
|
1269
|
+
str(ec2_found),
|
1270
|
+
str(ec2_stopped),
|
1271
|
+
format_cost(ec2_savings)
|
1272
|
+
)
|
1273
|
+
|
1274
|
+
# EBS Summary
|
1275
|
+
ebs_found = results.get('ebs_optimization', {}).get('low_usage_volumes_found', 0)
|
1276
|
+
ebs_deleted = results.get('ebs_optimization', {}).get('volumes_deleted', 0)
|
1277
|
+
ebs_savings = results.get('ebs_optimization', {}).get('potential_monthly_savings', 0.0)
|
1278
|
+
|
1279
|
+
summary_table.add_row(
|
1280
|
+
"EBS Volumes",
|
1281
|
+
str(ebs_found),
|
1282
|
+
str(ebs_deleted),
|
1283
|
+
format_cost(ebs_savings)
|
1284
|
+
)
|
1285
|
+
|
1286
|
+
# Total
|
1287
|
+
summary_table.add_row(
|
1288
|
+
"[bold]TOTAL[/bold]",
|
1289
|
+
"[bold]" + str(ec2_found + ebs_found) + "[/bold]",
|
1290
|
+
"[bold]" + str(ec2_stopped + ebs_deleted) + "[/bold]",
|
1291
|
+
"[bold]" + format_cost(total_monthly_savings) + "[/bold]"
|
1292
|
+
)
|
1293
|
+
|
1294
|
+
console.print(summary_table)
|
1295
|
+
|
1296
|
+
print_success(f"Total Annual Savings Potential: {format_cost(total_annual_savings)}")
|
1297
|
+
|
1298
|
+
if dry_run:
|
1299
|
+
print_warning("This was a DRY RUN. No actual changes were made.")
|
1300
|
+
|
1301
|
+
return {
|
1302
|
+
'total_monthly_savings': total_monthly_savings,
|
1303
|
+
'total_annual_savings': total_annual_savings,
|
1304
|
+
'ec2_optimization': results.get('ec2_optimization', {}),
|
1305
|
+
'ebs_optimization': results.get('ebs_optimization', {}),
|
1306
|
+
'dry_run': dry_run,
|
1307
|
+
'status': 'completed'
|
1308
|
+
}
|
1309
|
+
|
1310
|
+
|
1311
|
+
if __name__ == "__main__":
|
1312
|
+
# Direct execution for testing
|
1313
|
+
print("Testing Cost Optimization Module...")
|
1314
|
+
|
1315
|
+
# Test 1: EC2 Instance Optimization
|
1316
|
+
print("\n=== Testing EC2 Optimization ===")
|
1317
|
+
ec2_result = find_and_stop_idle_instances(
|
1318
|
+
region="us-east-1",
|
1319
|
+
idle_cpu_threshold=10,
|
1320
|
+
idle_duration=24,
|
1321
|
+
dry_run=True
|
1322
|
+
)
|
1323
|
+
print(f"EC2 Result: {ec2_result}")
|
1324
|
+
|
1325
|
+
# Test 2: EBS Volume Optimization
|
1326
|
+
print("\n=== Testing EBS Optimization ===")
|
1327
|
+
ebs_result = find_and_delete_low_usage_volumes(
|
1328
|
+
region="us-east-1",
|
1329
|
+
threshold_days=30,
|
1330
|
+
dry_run=True
|
1331
|
+
)
|
1332
|
+
print(f"EBS Result: {ebs_result}")
|
1333
|
+
|
1334
|
+
# Test 3: Comprehensive Optimization
|
1335
|
+
print("\n=== Testing Comprehensive Optimization ===")
|
1336
|
+
comprehensive_result = comprehensive_cost_optimization(
|
1337
|
+
region="us-east-1",
|
1338
|
+
dry_run=True
|
1339
|
+
)
|
1340
|
+
print(f"Comprehensive Result: {comprehensive_result}")
|