runbooks 0.7.7__py3-none-any.whl → 0.7.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- runbooks/__init__.py +1 -1
- runbooks/base.py +2 -2
- runbooks/cfat/__init__.py +8 -4
- runbooks/cfat/assessment/collectors.py +171 -14
- runbooks/cfat/assessment/compliance.py +546 -522
- runbooks/cfat/assessment/runner.py +122 -11
- runbooks/cfat/models.py +6 -2
- runbooks/common/logger.py +14 -0
- runbooks/common/rich_utils.py +451 -0
- runbooks/enterprise/__init__.py +68 -0
- runbooks/enterprise/error_handling.py +411 -0
- runbooks/enterprise/logging.py +439 -0
- runbooks/enterprise/multi_tenant.py +583 -0
- runbooks/finops/README.md +468 -241
- runbooks/finops/__init__.py +39 -3
- runbooks/finops/cli.py +31 -28
- runbooks/finops/cross_validation.py +375 -0
- runbooks/finops/dashboard_runner.py +384 -207
- runbooks/finops/enhanced_dashboard_runner.py +525 -0
- runbooks/finops/finops_dashboard.py +1892 -0
- runbooks/finops/helpers.py +176 -173
- runbooks/finops/optimizer.py +384 -383
- runbooks/finops/tests/__init__.py +19 -0
- runbooks/finops/tests/results_test_finops_dashboard.xml +1 -0
- runbooks/finops/tests/run_comprehensive_tests.py +421 -0
- runbooks/finops/tests/run_tests.py +305 -0
- runbooks/finops/tests/test_finops_dashboard.py +705 -0
- runbooks/finops/tests/test_integration.py +477 -0
- runbooks/finops/tests/test_performance.py +380 -0
- runbooks/finops/tests/test_performance_benchmarks.py +500 -0
- runbooks/finops/tests/test_reference_images_validation.py +867 -0
- runbooks/finops/tests/test_single_account_features.py +715 -0
- runbooks/finops/tests/validate_test_suite.py +220 -0
- runbooks/finops/types.py +1 -1
- runbooks/hitl/enhanced_workflow_engine.py +725 -0
- runbooks/inventory/artifacts/scale-optimize-status.txt +12 -0
- runbooks/inventory/collectors/aws_comprehensive.py +192 -185
- runbooks/inventory/collectors/enterprise_scale.py +281 -0
- runbooks/inventory/core/collector.py +172 -13
- runbooks/inventory/list_ec2_instances.py +18 -20
- runbooks/inventory/list_ssm_parameters.py +31 -3
- runbooks/inventory/organizations_discovery.py +1269 -0
- runbooks/inventory/rich_inventory_display.py +393 -0
- runbooks/inventory/run_on_multi_accounts.py +35 -19
- runbooks/inventory/runbooks.security.report_generator.log +0 -0
- runbooks/inventory/runbooks.security.run_script.log +0 -0
- runbooks/inventory/vpc_flow_analyzer.py +1030 -0
- runbooks/main.py +2124 -174
- runbooks/metrics/dora_metrics_engine.py +599 -0
- runbooks/operate/__init__.py +2 -2
- runbooks/operate/base.py +122 -10
- runbooks/operate/deployment_framework.py +1032 -0
- runbooks/operate/deployment_validator.py +853 -0
- runbooks/operate/dynamodb_operations.py +10 -6
- runbooks/operate/ec2_operations.py +319 -11
- runbooks/operate/executive_dashboard.py +779 -0
- runbooks/operate/mcp_integration.py +750 -0
- runbooks/operate/nat_gateway_operations.py +1120 -0
- runbooks/operate/networking_cost_heatmap.py +685 -0
- runbooks/operate/privatelink_operations.py +940 -0
- runbooks/operate/s3_operations.py +10 -6
- runbooks/operate/vpc_endpoints.py +644 -0
- runbooks/operate/vpc_operations.py +1038 -0
- runbooks/remediation/__init__.py +2 -2
- runbooks/remediation/acm_remediation.py +1 -1
- runbooks/remediation/base.py +1 -1
- runbooks/remediation/cloudtrail_remediation.py +1 -1
- runbooks/remediation/cognito_remediation.py +1 -1
- runbooks/remediation/dynamodb_remediation.py +1 -1
- runbooks/remediation/ec2_remediation.py +1 -1
- runbooks/remediation/ec2_unattached_ebs_volumes.py +1 -1
- runbooks/remediation/kms_enable_key_rotation.py +1 -1
- runbooks/remediation/kms_remediation.py +1 -1
- runbooks/remediation/lambda_remediation.py +1 -1
- runbooks/remediation/multi_account.py +1 -1
- runbooks/remediation/rds_remediation.py +1 -1
- runbooks/remediation/s3_block_public_access.py +1 -1
- runbooks/remediation/s3_enable_access_logging.py +1 -1
- runbooks/remediation/s3_encryption.py +1 -1
- runbooks/remediation/s3_remediation.py +1 -1
- runbooks/remediation/vpc_remediation.py +475 -0
- runbooks/security/__init__.py +3 -1
- runbooks/security/compliance_automation.py +632 -0
- runbooks/security/report_generator.py +10 -0
- runbooks/security/run_script.py +31 -5
- runbooks/security/security_baseline_tester.py +169 -30
- runbooks/security/security_export.py +477 -0
- runbooks/validation/__init__.py +10 -0
- runbooks/validation/benchmark.py +484 -0
- runbooks/validation/cli.py +356 -0
- runbooks/validation/mcp_validator.py +768 -0
- runbooks/vpc/__init__.py +38 -0
- runbooks/vpc/config.py +212 -0
- runbooks/vpc/cost_engine.py +347 -0
- runbooks/vpc/heatmap_engine.py +605 -0
- runbooks/vpc/manager_interface.py +634 -0
- runbooks/vpc/networking_wrapper.py +1260 -0
- runbooks/vpc/rich_formatters.py +679 -0
- runbooks/vpc/tests/__init__.py +5 -0
- runbooks/vpc/tests/conftest.py +356 -0
- runbooks/vpc/tests/test_cli_integration.py +530 -0
- runbooks/vpc/tests/test_config.py +458 -0
- runbooks/vpc/tests/test_cost_engine.py +479 -0
- runbooks/vpc/tests/test_networking_wrapper.py +512 -0
- {runbooks-0.7.7.dist-info → runbooks-0.7.9.dist-info}/METADATA +40 -12
- {runbooks-0.7.7.dist-info → runbooks-0.7.9.dist-info}/RECORD +110 -52
- {runbooks-0.7.7.dist-info → runbooks-0.7.9.dist-info}/WHEEL +0 -0
- {runbooks-0.7.7.dist-info → runbooks-0.7.9.dist-info}/entry_points.txt +0 -0
- {runbooks-0.7.7.dist-info → runbooks-0.7.9.dist-info}/licenses/LICENSE +0 -0
- {runbooks-0.7.7.dist-info → runbooks-0.7.9.dist-info}/top_level.txt +0 -0
runbooks/finops/optimizer.py
CHANGED
@@ -4,16 +4,18 @@ Sprint 1-3: Achieve 40% cost reduction ($1.4M annually)
|
|
4
4
|
"""
|
5
5
|
|
6
6
|
import json
|
7
|
-
import boto3
|
8
|
-
from datetime import datetime, timedelta
|
9
|
-
from typing import Dict, List, Any, Optional
|
10
|
-
from dataclasses import dataclass
|
11
7
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
8
|
+
from dataclasses import dataclass
|
9
|
+
from datetime import datetime, timedelta
|
10
|
+
from typing import Any, Dict, List, Optional
|
11
|
+
|
12
|
+
import boto3
|
12
13
|
|
13
14
|
|
14
15
|
@dataclass
|
15
16
|
class CostSavingsOpportunity:
|
16
17
|
"""Data class for cost savings opportunity."""
|
18
|
+
|
17
19
|
resource_type: str
|
18
20
|
resource_id: str
|
19
21
|
account_id: str
|
@@ -30,15 +32,15 @@ class CostOptimizer:
|
|
30
32
|
Advanced cost optimization engine for enterprise AWS organizations.
|
31
33
|
Identifies 25-50% cost savings opportunities across all services.
|
32
34
|
"""
|
33
|
-
|
34
|
-
def __init__(self, profile: str = None, target_savings_percent: float = 40.0, max_accounts: int =
|
35
|
+
|
36
|
+
def __init__(self, profile: str = None, target_savings_percent: float = 40.0, max_accounts: int = None):
|
35
37
|
"""
|
36
38
|
Initialize cost optimizer for enterprise-scale analysis.
|
37
|
-
|
39
|
+
|
38
40
|
Args:
|
39
41
|
profile: AWS profile for authentication
|
40
42
|
target_savings_percent: Target savings percentage (default: 40%)
|
41
|
-
max_accounts: Maximum accounts to analyze (
|
43
|
+
max_accounts: Maximum accounts to analyze (None = analyze all discovered accounts)
|
42
44
|
"""
|
43
45
|
self.profile = profile
|
44
46
|
self.target_savings_percent = target_savings_percent
|
@@ -47,351 +49,348 @@ class CostOptimizer:
|
|
47
49
|
self.opportunities = []
|
48
50
|
self.analysis_results = {}
|
49
51
|
self.enhanced_services = [
|
50
|
-
|
51
|
-
|
52
|
+
"ec2",
|
53
|
+
"s3",
|
54
|
+
"rds",
|
55
|
+
"lambda",
|
56
|
+
"dynamodb",
|
57
|
+
"cloudwatch",
|
58
|
+
"vpc",
|
59
|
+
"elb",
|
60
|
+
"ebs",
|
61
|
+
"eip",
|
62
|
+
"nat_gateway",
|
63
|
+
"cloudtrail",
|
52
64
|
]
|
53
|
-
|
65
|
+
|
54
66
|
def identify_all_waste(self, accounts: List[str] = None) -> Dict[str, List[CostSavingsOpportunity]]:
|
55
67
|
"""
|
56
68
|
Enhanced waste identification across all accounts with broader coverage.
|
57
|
-
|
69
|
+
|
58
70
|
Returns:
|
59
71
|
Dictionary of waste patterns with savings opportunities
|
60
72
|
"""
|
61
73
|
if not accounts:
|
62
|
-
accounts = self._get_all_accounts()[:self.max_accounts]
|
63
|
-
|
74
|
+
accounts = self._get_all_accounts()[: self.max_accounts]
|
75
|
+
|
64
76
|
print(f"🔍 Analyzing {len(accounts)} accounts for cost optimization opportunities...")
|
65
|
-
|
77
|
+
|
66
78
|
waste_patterns = {
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
79
|
+
"idle_resources": self.find_idle_resources(accounts),
|
80
|
+
"oversized_instances": self.analyze_rightsizing_opportunities(accounts),
|
81
|
+
"unattached_storage": self.find_orphaned_ebs_volumes(accounts),
|
82
|
+
"old_snapshots": self.find_old_snapshots(accounts),
|
83
|
+
"unused_elastic_ips": self.find_unused_elastic_ips(accounts),
|
84
|
+
"underutilized_rds": self.find_underutilized_rds(accounts),
|
85
|
+
"lambda_over_provisioned": self.find_lambda_waste(accounts),
|
86
|
+
"unused_load_balancers": self.find_unused_load_balancers(accounts),
|
87
|
+
"storage_class_optimization": self.analyze_s3_storage_class(accounts),
|
88
|
+
"cloudwatch_logs_retention": self.analyze_log_retention(accounts),
|
77
89
|
# Enhanced analysis for higher savings
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
90
|
+
"nat_gateway_optimization": self.find_nat_gateway_waste(accounts),
|
91
|
+
"cloudtrail_optimization": self.find_cloudtrail_waste(accounts),
|
92
|
+
"cloudwatch_metrics_waste": self.find_cloudwatch_metrics_waste(accounts),
|
93
|
+
"unused_security_groups": self.find_unused_security_groups(accounts),
|
94
|
+
"reserved_instance_opportunities": self.analyze_reserved_instance_opportunities(accounts),
|
83
95
|
}
|
84
|
-
|
96
|
+
|
85
97
|
# Consolidate all opportunities
|
86
98
|
all_opportunities = []
|
87
99
|
total_monthly_savings = 0
|
88
|
-
|
100
|
+
|
89
101
|
for pattern, opportunities in waste_patterns.items():
|
90
102
|
all_opportunities.extend(opportunities)
|
91
103
|
pattern_savings = sum(op.potential_savings for op in opportunities)
|
92
104
|
total_monthly_savings += pattern_savings
|
93
105
|
print(f" 📊 {pattern}: {len(opportunities)} opportunities, ${pattern_savings:,.0f}/month")
|
94
|
-
|
106
|
+
|
95
107
|
self.opportunities = all_opportunities
|
96
108
|
print(f"💰 Total identified: ${total_monthly_savings:,.0f}/month (${total_monthly_savings * 12:,.0f}/year)")
|
97
|
-
|
109
|
+
|
98
110
|
return waste_patterns
|
99
|
-
|
111
|
+
|
100
112
|
def find_idle_resources(self, accounts: List[str]) -> List[CostSavingsOpportunity]:
|
101
113
|
"""Find idle EC2 instances with minimal CPU utilization."""
|
102
114
|
opportunities = []
|
103
|
-
|
115
|
+
|
104
116
|
if not accounts:
|
105
117
|
accounts = self._get_all_accounts()
|
106
|
-
|
118
|
+
|
107
119
|
with ThreadPoolExecutor(max_workers=10) as executor:
|
108
120
|
futures = [executor.submit(self._analyze_idle_ec2, account) for account in accounts]
|
109
|
-
|
121
|
+
|
110
122
|
for future in as_completed(futures):
|
111
123
|
try:
|
112
124
|
account_opportunities = future.result()
|
113
125
|
opportunities.extend(account_opportunities)
|
114
126
|
except Exception as e:
|
115
127
|
print(f"Error analyzing idle resources: {e}")
|
116
|
-
|
128
|
+
|
117
129
|
return opportunities
|
118
|
-
|
130
|
+
|
119
131
|
def _analyze_idle_ec2(self, account_id: str) -> List[CostSavingsOpportunity]:
|
120
132
|
"""Analyze EC2 instances for idle resources in a specific account."""
|
121
133
|
opportunities = []
|
122
|
-
|
134
|
+
|
123
135
|
try:
|
124
136
|
# Get session for account (would use cross-account role in production)
|
125
137
|
session = self._get_account_session(account_id)
|
126
|
-
ec2 = session.client(
|
127
|
-
cloudwatch = session.client(
|
128
|
-
|
138
|
+
ec2 = session.client("ec2")
|
139
|
+
cloudwatch = session.client("cloudwatch")
|
140
|
+
|
129
141
|
# Get all running instances
|
130
|
-
response = ec2.describe_instances(Filters=[{
|
131
|
-
|
132
|
-
for reservation in response[
|
133
|
-
for instance in reservation[
|
134
|
-
instance_id = instance[
|
135
|
-
|
142
|
+
response = ec2.describe_instances(Filters=[{"Name": "state", "Values": ["running"]}])
|
143
|
+
|
144
|
+
for reservation in response["Reservations"]:
|
145
|
+
for instance in reservation["Instances"]:
|
146
|
+
instance_id = instance["InstanceId"]
|
147
|
+
|
136
148
|
# Check CPU utilization over last 30 days
|
137
|
-
cpu_utilization = self._get_cpu_utilization(
|
138
|
-
|
139
|
-
)
|
140
|
-
|
149
|
+
cpu_utilization = self._get_cpu_utilization(cloudwatch, instance_id, days=30)
|
150
|
+
|
141
151
|
if cpu_utilization < 5.0: # Less than 5% average CPU
|
142
|
-
monthly_cost = self._estimate_ec2_monthly_cost(instance[
|
143
|
-
|
152
|
+
monthly_cost = self._estimate_ec2_monthly_cost(instance["InstanceType"])
|
153
|
+
|
144
154
|
opportunity = CostSavingsOpportunity(
|
145
|
-
resource_type=
|
155
|
+
resource_type="ec2_instance",
|
146
156
|
resource_id=instance_id,
|
147
157
|
account_id=account_id,
|
148
158
|
current_cost=monthly_cost,
|
149
159
|
potential_savings=monthly_cost * 0.9, # 90% savings by terminating
|
150
|
-
confidence=
|
151
|
-
action_required=
|
152
|
-
implementation_effort=
|
153
|
-
business_impact=
|
160
|
+
confidence="high",
|
161
|
+
action_required="terminate_or_rightsize",
|
162
|
+
implementation_effort="low",
|
163
|
+
business_impact="medium",
|
154
164
|
)
|
155
165
|
opportunities.append(opportunity)
|
156
|
-
|
166
|
+
|
157
167
|
except Exception as e:
|
158
168
|
print(f"Error analyzing account {account_id}: {e}")
|
159
|
-
|
169
|
+
|
160
170
|
return opportunities
|
161
|
-
|
171
|
+
|
162
172
|
def analyze_rightsizing_opportunities(self, accounts: List[str]) -> List[CostSavingsOpportunity]:
|
163
173
|
"""Identify EC2 instances that can be rightsized."""
|
164
174
|
opportunities = []
|
165
|
-
|
175
|
+
|
166
176
|
# Rightsizing analysis logic
|
167
177
|
rightsizing_rules = {
|
168
|
-
|
169
|
-
|
170
|
-
|
178
|
+
"cpu_utilization": {"threshold": 20, "savings_potential": 0.3},
|
179
|
+
"memory_utilization": {"threshold": 30, "savings_potential": 0.25},
|
180
|
+
"network_utilization": {"threshold": 10, "savings_potential": 0.15},
|
171
181
|
}
|
172
|
-
|
182
|
+
|
173
183
|
for account_id in accounts or self._get_all_accounts():
|
174
184
|
try:
|
175
185
|
session = self._get_account_session(account_id)
|
176
|
-
ec2 = session.client(
|
177
|
-
cloudwatch = session.client(
|
178
|
-
|
186
|
+
ec2 = session.client("ec2")
|
187
|
+
cloudwatch = session.client("cloudwatch")
|
188
|
+
|
179
189
|
instances = self._get_running_instances(ec2)
|
180
|
-
|
190
|
+
|
181
191
|
for instance in instances:
|
182
|
-
instance_type = instance[
|
192
|
+
instance_type = instance["InstanceType"]
|
183
193
|
current_cost = self._estimate_ec2_monthly_cost(instance_type)
|
184
|
-
|
194
|
+
|
185
195
|
# Analyze utilization patterns
|
186
|
-
utilization = self._analyze_instance_utilization(
|
187
|
-
|
188
|
-
)
|
189
|
-
|
196
|
+
utilization = self._analyze_instance_utilization(cloudwatch, instance["InstanceId"])
|
197
|
+
|
190
198
|
# Calculate potential savings
|
191
|
-
if utilization[
|
199
|
+
if utilization["cpu_avg"] < 20 and utilization["memory_avg"] < 30:
|
192
200
|
smaller_instance = self._suggest_smaller_instance(instance_type)
|
193
201
|
if smaller_instance:
|
194
202
|
smaller_cost = self._estimate_ec2_monthly_cost(smaller_instance)
|
195
|
-
|
203
|
+
|
196
204
|
opportunity = CostSavingsOpportunity(
|
197
|
-
resource_type=
|
198
|
-
resource_id=instance[
|
205
|
+
resource_type="ec2_instance",
|
206
|
+
resource_id=instance["InstanceId"],
|
199
207
|
account_id=account_id,
|
200
208
|
current_cost=current_cost,
|
201
209
|
potential_savings=current_cost - smaller_cost,
|
202
|
-
confidence=
|
203
|
-
action_required=f
|
204
|
-
implementation_effort=
|
205
|
-
business_impact=
|
210
|
+
confidence="high",
|
211
|
+
action_required=f"rightsize_to_{smaller_instance}",
|
212
|
+
implementation_effort="medium",
|
213
|
+
business_impact="low",
|
206
214
|
)
|
207
215
|
opportunities.append(opportunity)
|
208
|
-
|
216
|
+
|
209
217
|
except Exception as e:
|
210
218
|
print(f"Error analyzing rightsizing for account {account_id}: {e}")
|
211
|
-
|
219
|
+
|
212
220
|
return opportunities
|
213
|
-
|
221
|
+
|
214
222
|
def find_orphaned_ebs_volumes(self, accounts: List[str]) -> List[CostSavingsOpportunity]:
|
215
223
|
"""Find unattached EBS volumes."""
|
216
224
|
opportunities = []
|
217
|
-
|
225
|
+
|
218
226
|
for account_id in accounts or self._get_all_accounts():
|
219
227
|
try:
|
220
228
|
session = self._get_account_session(account_id)
|
221
|
-
ec2 = session.client(
|
222
|
-
|
229
|
+
ec2 = session.client("ec2")
|
230
|
+
|
223
231
|
# Get all unattached volumes
|
224
|
-
response = ec2.describe_volumes(
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
volume_type = volume['VolumeType']
|
232
|
-
|
232
|
+
response = ec2.describe_volumes(Filters=[{"Name": "status", "Values": ["available"]}])
|
233
|
+
|
234
|
+
for volume in response["Volumes"]:
|
235
|
+
volume_id = volume["VolumeId"]
|
236
|
+
size_gb = volume["Size"]
|
237
|
+
volume_type = volume["VolumeType"]
|
238
|
+
|
233
239
|
# Calculate monthly cost
|
234
240
|
monthly_cost = self._calculate_ebs_cost(size_gb, volume_type)
|
235
|
-
|
241
|
+
|
236
242
|
opportunity = CostSavingsOpportunity(
|
237
|
-
resource_type=
|
243
|
+
resource_type="ebs_volume",
|
238
244
|
resource_id=volume_id,
|
239
245
|
account_id=account_id,
|
240
246
|
current_cost=monthly_cost,
|
241
247
|
potential_savings=monthly_cost, # 100% savings by deletion
|
242
|
-
confidence=
|
243
|
-
action_required=
|
244
|
-
implementation_effort=
|
245
|
-
business_impact=
|
248
|
+
confidence="high",
|
249
|
+
action_required="delete_after_snapshot",
|
250
|
+
implementation_effort="low",
|
251
|
+
business_impact="low",
|
246
252
|
)
|
247
253
|
opportunities.append(opportunity)
|
248
|
-
|
254
|
+
|
249
255
|
except Exception as e:
|
250
256
|
print(f"Error finding orphaned volumes in {account_id}: {e}")
|
251
|
-
|
257
|
+
|
252
258
|
return opportunities
|
253
|
-
|
259
|
+
|
254
260
|
def find_old_snapshots(self, accounts: List[str]) -> List[CostSavingsOpportunity]:
|
255
261
|
"""Find old EBS snapshots older than retention policy."""
|
256
262
|
opportunities = []
|
257
263
|
cutoff_date = datetime.now() - timedelta(days=90) # 90-day retention
|
258
|
-
|
264
|
+
|
259
265
|
for account_id in accounts or self._get_all_accounts():
|
260
266
|
try:
|
261
267
|
session = self._get_account_session(account_id)
|
262
|
-
ec2 = session.client(
|
263
|
-
|
264
|
-
response = ec2.describe_snapshots(OwnerIds=[
|
265
|
-
|
266
|
-
for snapshot in response[
|
267
|
-
start_time = snapshot[
|
268
|
-
|
268
|
+
ec2 = session.client("ec2")
|
269
|
+
|
270
|
+
response = ec2.describe_snapshots(OwnerIds=["self"])
|
271
|
+
|
272
|
+
for snapshot in response["Snapshots"]:
|
273
|
+
start_time = snapshot["StartTime"].replace(tzinfo=None)
|
274
|
+
|
269
275
|
if start_time < cutoff_date:
|
270
276
|
# Estimate snapshot cost (approximately $0.05 per GB per month)
|
271
|
-
volume_size = snapshot.get(
|
277
|
+
volume_size = snapshot.get("VolumeSize", 0)
|
272
278
|
monthly_cost = volume_size * 0.05
|
273
|
-
|
279
|
+
|
274
280
|
opportunity = CostSavingsOpportunity(
|
275
|
-
resource_type=
|
276
|
-
resource_id=snapshot[
|
281
|
+
resource_type="ebs_snapshot",
|
282
|
+
resource_id=snapshot["SnapshotId"],
|
277
283
|
account_id=account_id,
|
278
284
|
current_cost=monthly_cost,
|
279
285
|
potential_savings=monthly_cost,
|
280
|
-
confidence=
|
281
|
-
action_required=
|
282
|
-
implementation_effort=
|
283
|
-
business_impact=
|
286
|
+
confidence="medium",
|
287
|
+
action_required="delete_old_snapshot",
|
288
|
+
implementation_effort="low",
|
289
|
+
business_impact="low",
|
284
290
|
)
|
285
291
|
opportunities.append(opportunity)
|
286
|
-
|
292
|
+
|
287
293
|
except Exception as e:
|
288
294
|
print(f"Error finding old snapshots in {account_id}: {e}")
|
289
|
-
|
295
|
+
|
290
296
|
return opportunities
|
291
|
-
|
297
|
+
|
292
298
|
def calculate_total_savings(self) -> Dict[str, float]:
|
293
299
|
"""Calculate total potential savings from all opportunities."""
|
294
300
|
if not self.opportunities:
|
295
|
-
return {
|
296
|
-
|
301
|
+
return {"monthly": 0, "annual": 0, "percentage": 0}
|
302
|
+
|
297
303
|
total_monthly_savings = sum(op.potential_savings for op in self.opportunities)
|
298
304
|
total_annual_savings = total_monthly_savings * 12
|
299
|
-
|
305
|
+
|
300
306
|
# Estimate current spend (this would come from Cost Explorer in production)
|
301
307
|
estimated_current_monthly_spend = 292000 # $3.5M annual / 12 months
|
302
308
|
savings_percentage = (total_monthly_savings / estimated_current_monthly_spend) * 100
|
303
|
-
|
309
|
+
|
304
310
|
return {
|
305
|
-
|
306
|
-
|
307
|
-
|
311
|
+
"monthly": total_monthly_savings,
|
312
|
+
"annual": total_annual_savings,
|
313
|
+
"percentage": min(savings_percentage, 100),
|
308
314
|
}
|
309
|
-
|
315
|
+
|
310
316
|
def generate_savings_report(self) -> Dict[str, Any]:
|
311
317
|
"""Generate comprehensive cost savings report."""
|
312
318
|
savings_summary = self.calculate_total_savings()
|
313
|
-
|
319
|
+
|
314
320
|
# Group opportunities by type
|
315
321
|
opportunities_by_type = {}
|
316
322
|
for op in self.opportunities:
|
317
323
|
if op.resource_type not in opportunities_by_type:
|
318
324
|
opportunities_by_type[op.resource_type] = []
|
319
325
|
opportunities_by_type[op.resource_type].append(op)
|
320
|
-
|
326
|
+
|
321
327
|
# Calculate savings by type
|
322
328
|
savings_by_type = {}
|
323
329
|
for resource_type, opportunities in opportunities_by_type.items():
|
324
330
|
total_savings = sum(op.potential_savings for op in opportunities)
|
325
331
|
savings_by_type[resource_type] = {
|
326
|
-
|
327
|
-
|
328
|
-
|
332
|
+
"count": len(opportunities),
|
333
|
+
"monthly_savings": total_savings,
|
334
|
+
"annual_savings": total_savings * 12,
|
329
335
|
}
|
330
|
-
|
336
|
+
|
331
337
|
report = {
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
338
|
+
"metadata": {
|
339
|
+
"generated_at": datetime.now().isoformat(),
|
340
|
+
"target_savings_percent": self.target_savings_percent,
|
341
|
+
"analysis_scope": "all_accounts",
|
342
|
+
"total_opportunities": len(self.opportunities),
|
337
343
|
},
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
344
|
+
"summary": savings_summary,
|
345
|
+
"by_resource_type": savings_by_type,
|
346
|
+
"top_opportunities": self._get_top_opportunities(10),
|
347
|
+
"quick_wins": self._get_quick_wins(),
|
348
|
+
"recommendations": self._generate_recommendations(),
|
343
349
|
}
|
344
|
-
|
350
|
+
|
345
351
|
# Save report
|
346
352
|
self._save_report(report)
|
347
|
-
|
353
|
+
|
348
354
|
return report
|
349
|
-
|
355
|
+
|
350
356
|
def _get_top_opportunities(self, limit: int = 10) -> List[Dict]:
|
351
357
|
"""Get top savings opportunities sorted by potential savings."""
|
352
|
-
sorted_opportunities = sorted(
|
353
|
-
|
354
|
-
key=lambda x: x.potential_savings,
|
355
|
-
reverse=True
|
356
|
-
)
|
357
|
-
|
358
|
+
sorted_opportunities = sorted(self.opportunities, key=lambda x: x.potential_savings, reverse=True)
|
359
|
+
|
358
360
|
return [
|
359
361
|
{
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
362
|
+
"resource_type": op.resource_type,
|
363
|
+
"resource_id": op.resource_id,
|
364
|
+
"account_id": op.account_id,
|
365
|
+
"monthly_savings": op.potential_savings,
|
366
|
+
"annual_savings": op.potential_savings * 12,
|
367
|
+
"confidence": op.confidence,
|
368
|
+
"action": op.action_required,
|
367
369
|
}
|
368
370
|
for op in sorted_opportunities[:limit]
|
369
371
|
]
|
370
|
-
|
372
|
+
|
371
373
|
def _get_quick_wins(self) -> List[Dict]:
|
372
374
|
"""Get quick win opportunities (low effort, high impact)."""
|
373
|
-
quick_wins = [
|
374
|
-
|
375
|
-
if op.implementation_effort == 'low' and op.confidence == 'high'
|
376
|
-
]
|
377
|
-
|
375
|
+
quick_wins = [op for op in self.opportunities if op.implementation_effort == "low" and op.confidence == "high"]
|
376
|
+
|
378
377
|
return [
|
379
378
|
{
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
379
|
+
"resource_type": op.resource_type,
|
380
|
+
"resource_id": op.resource_id,
|
381
|
+
"monthly_savings": op.potential_savings,
|
382
|
+
"action": op.action_required,
|
384
383
|
}
|
385
384
|
for op in sorted(quick_wins, key=lambda x: x.potential_savings, reverse=True)
|
386
385
|
]
|
387
|
-
|
386
|
+
|
388
387
|
def _generate_recommendations(self) -> List[str]:
|
389
388
|
"""Generate strategic recommendations based on analysis."""
|
390
389
|
total_savings = self.calculate_total_savings()
|
391
|
-
|
390
|
+
|
392
391
|
recommendations = []
|
393
|
-
|
394
|
-
if total_savings[
|
392
|
+
|
393
|
+
if total_savings["percentage"] >= self.target_savings_percent:
|
395
394
|
recommendations.append(
|
396
395
|
f"✅ Target of {self.target_savings_percent}% savings achievable "
|
397
396
|
f"(identified {total_savings['percentage']:.1f}%)"
|
@@ -401,422 +400,424 @@ class CostOptimizer:
|
|
401
400
|
f"⚠️ Additional analysis needed to reach {self.target_savings_percent}% target "
|
402
401
|
f"(current: {total_savings['percentage']:.1f}%)"
|
403
402
|
)
|
404
|
-
|
403
|
+
|
405
404
|
# Add specific recommendations
|
406
405
|
quick_wins = self._get_quick_wins()
|
407
406
|
if quick_wins:
|
408
|
-
quick_win_savings = sum(op[
|
409
|
-
recommendations.append(
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
407
|
+
quick_win_savings = sum(op["monthly_savings"] for op in quick_wins[:5])
|
408
|
+
recommendations.append(f"🚀 Implement top 5 quick wins first: ${quick_win_savings:,.0f}/month savings")
|
409
|
+
|
410
|
+
recommendations.extend(
|
411
|
+
[
|
412
|
+
"📊 Prioritize high-confidence, low-effort opportunities",
|
413
|
+
"🔄 Implement automated cleanup for orphaned resources",
|
414
|
+
"📈 Set up continuous cost monitoring and alerts",
|
415
|
+
"🎯 Focus on rightsizing before Reserved Instance purchases",
|
416
|
+
]
|
417
|
+
)
|
418
|
+
|
420
419
|
return recommendations
|
421
|
-
|
420
|
+
|
422
421
|
def _save_report(self, report: Dict[str, Any]):
|
423
422
|
"""Save cost optimization report to artifacts."""
|
424
423
|
import os
|
425
|
-
|
426
|
-
os.makedirs(
|
427
|
-
|
424
|
+
|
425
|
+
os.makedirs("artifacts/sprint-1/finops", exist_ok=True)
|
426
|
+
|
428
427
|
# Save JSON report
|
429
|
-
with open(
|
428
|
+
with open("artifacts/sprint-1/finops/cost-optimization-report.json", "w") as f:
|
430
429
|
json.dump(report, f, indent=2, default=str)
|
431
|
-
|
430
|
+
|
432
431
|
# Save CSV summary
|
433
432
|
import csv
|
434
|
-
|
433
|
+
|
434
|
+
with open("artifacts/sprint-1/finops/savings-opportunities.csv", "w", newline="") as f:
|
435
435
|
writer = csv.writer(f)
|
436
|
-
writer.writerow(
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
436
|
+
writer.writerow(
|
437
|
+
[
|
438
|
+
"Resource Type",
|
439
|
+
"Resource ID",
|
440
|
+
"Account ID",
|
441
|
+
"Monthly Savings",
|
442
|
+
"Annual Savings",
|
443
|
+
"Confidence",
|
444
|
+
"Action Required",
|
445
|
+
]
|
446
|
+
)
|
447
|
+
|
441
448
|
for op in self.opportunities:
|
442
|
-
writer.writerow(
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
+
writer.writerow(
|
450
|
+
[
|
451
|
+
op.resource_type,
|
452
|
+
op.resource_id,
|
453
|
+
op.account_id,
|
454
|
+
f"${op.potential_savings:,.2f}",
|
455
|
+
f"${op.potential_savings * 12:,.2f}",
|
456
|
+
op.confidence,
|
457
|
+
op.action_required,
|
458
|
+
]
|
459
|
+
)
|
460
|
+
|
449
461
|
print("💰 Cost optimization report saved:")
|
450
462
|
print(" - artifacts/sprint-1/finops/cost-optimization-report.json")
|
451
463
|
print(" - artifacts/sprint-1/finops/savings-opportunities.csv")
|
452
|
-
|
464
|
+
|
453
465
|
# Helper methods
|
454
466
|
def _get_all_accounts(self) -> List[str]:
|
455
467
|
"""Get all AWS accounts from Organizations (enhanced for multi-account org)."""
|
456
468
|
# Enhanced mock for multi-account organization
|
457
|
-
base_accounts = [
|
458
|
-
|
469
|
+
base_accounts = ["123456789012", "234567890123", "345678901234"]
|
470
|
+
|
459
471
|
# Generate additional accounts to simulate large organization
|
460
472
|
additional_accounts = []
|
461
473
|
for i in range(4, self.max_accounts + 1):
|
462
474
|
# Generate realistic account IDs
|
463
475
|
account_id = str(100000000000 + i * 11111)
|
464
476
|
additional_accounts.append(account_id)
|
465
|
-
|
477
|
+
|
466
478
|
all_accounts = base_accounts + additional_accounts
|
467
479
|
print(f"📊 Discovered {len(all_accounts)} accounts in organization")
|
468
480
|
return all_accounts
|
469
|
-
|
481
|
+
|
470
482
|
def _get_account_session(self, account_id: str):
|
471
483
|
"""Get boto3 session for specific account."""
|
472
484
|
# In production, would assume cross-account role
|
473
485
|
return self.session
|
474
|
-
|
486
|
+
|
475
487
|
def _estimate_ec2_monthly_cost(self, instance_type: str) -> float:
|
476
488
|
"""Estimate monthly EC2 cost."""
|
477
489
|
hourly_costs = {
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
490
|
+
"t2.micro": 0.0116,
|
491
|
+
"t2.small": 0.023,
|
492
|
+
"t2.medium": 0.046,
|
493
|
+
"t3.micro": 0.0104,
|
494
|
+
"t3.small": 0.021,
|
495
|
+
"t3.medium": 0.042,
|
496
|
+
"m5.large": 0.096,
|
497
|
+
"m5.xlarge": 0.192,
|
498
|
+
"m5.2xlarge": 0.384,
|
499
|
+
"m5.4xlarge": 0.768,
|
500
|
+
"m5.8xlarge": 1.536,
|
482
501
|
}
|
483
502
|
hourly = hourly_costs.get(instance_type, 0.1)
|
484
503
|
return hourly * 24 * 30
|
485
|
-
|
504
|
+
|
486
505
|
def _calculate_ebs_cost(self, size_gb: int, volume_type: str) -> float:
|
487
506
|
"""Calculate monthly EBS cost."""
|
488
|
-
rates = {
|
489
|
-
'gp2': 0.10,
|
490
|
-
'gp3': 0.08,
|
491
|
-
'io1': 0.125,
|
492
|
-
'io2': 0.125,
|
493
|
-
'st1': 0.045,
|
494
|
-
'sc1': 0.025
|
495
|
-
}
|
507
|
+
rates = {"gp2": 0.10, "gp3": 0.08, "io1": 0.125, "io2": 0.125, "st1": 0.045, "sc1": 0.025}
|
496
508
|
rate = rates.get(volume_type, 0.10)
|
497
509
|
return size_gb * rate
|
498
|
-
|
510
|
+
|
499
511
|
def _get_cpu_utilization(self, cloudwatch, instance_id: str, days: int = 30) -> float:
|
500
512
|
"""Get average CPU utilization for instance."""
|
501
513
|
# Mock implementation - in production would query CloudWatch
|
502
514
|
return 3.5 # Mock low utilization
|
503
|
-
|
515
|
+
|
504
516
|
def _get_running_instances(self, ec2_client):
|
505
517
|
"""Get all running EC2 instances."""
|
506
|
-
response = ec2_client.describe_instances(
|
507
|
-
Filters=[{'Name': 'state', 'Values': ['running']}]
|
508
|
-
)
|
518
|
+
response = ec2_client.describe_instances(Filters=[{"Name": "state", "Values": ["running"]}])
|
509
519
|
instances = []
|
510
|
-
for reservation in response[
|
511
|
-
instances.extend(reservation[
|
520
|
+
for reservation in response["Reservations"]:
|
521
|
+
instances.extend(reservation["Instances"])
|
512
522
|
return instances
|
513
|
-
|
523
|
+
|
514
524
|
def _analyze_instance_utilization(self, cloudwatch, instance_id: str) -> Dict[str, float]:
|
515
525
|
"""Analyze instance utilization metrics."""
|
516
526
|
# Mock implementation
|
517
|
-
return {
|
518
|
-
|
519
|
-
'memory_avg': 25.0,
|
520
|
-
'network_avg': 5.0
|
521
|
-
}
|
522
|
-
|
527
|
+
return {"cpu_avg": 15.0, "memory_avg": 25.0, "network_avg": 5.0}
|
528
|
+
|
523
529
|
def _suggest_smaller_instance(self, current_type: str) -> Optional[str]:
|
524
530
|
"""Suggest a smaller instance type."""
|
525
531
|
downsizing_map = {
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
532
|
+
"m5.2xlarge": "m5.xlarge",
|
533
|
+
"m5.xlarge": "m5.large",
|
534
|
+
"m5.large": "m5.medium",
|
535
|
+
"t3.large": "t3.medium",
|
536
|
+
"t3.medium": "t3.small",
|
531
537
|
}
|
532
538
|
return downsizing_map.get(current_type)
|
533
|
-
|
539
|
+
|
534
540
|
# Additional methods for other resource types
|
535
541
|
def find_unused_elastic_ips(self, accounts: List[str]) -> List[CostSavingsOpportunity]:
|
536
542
|
"""Find unused Elastic IP addresses."""
|
537
543
|
return [] # Implementation placeholder
|
538
|
-
|
544
|
+
|
539
545
|
def find_underutilized_rds(self, accounts: List[str]) -> List[CostSavingsOpportunity]:
|
540
546
|
"""Find underutilized RDS instances."""
|
541
547
|
return [] # Implementation placeholder
|
542
|
-
|
548
|
+
|
543
549
|
def find_lambda_waste(self, accounts: List[str]) -> List[CostSavingsOpportunity]:
|
544
550
|
"""Find over-provisioned Lambda functions."""
|
545
551
|
return [] # Implementation placeholder
|
546
|
-
|
552
|
+
|
547
553
|
def find_unused_load_balancers(self, accounts: List[str]) -> List[CostSavingsOpportunity]:
|
548
554
|
"""Find unused load balancers."""
|
549
555
|
return [] # Implementation placeholder
|
550
|
-
|
556
|
+
|
551
557
|
def analyze_s3_storage_class(self, accounts: List[str]) -> List[CostSavingsOpportunity]:
|
552
558
|
"""Analyze S3 storage class optimization."""
|
553
559
|
return [] # Implementation placeholder
|
554
|
-
|
560
|
+
|
555
561
|
def analyze_log_retention(self, accounts: List[str]) -> List[CostSavingsOpportunity]:
|
556
562
|
"""Analyze CloudWatch log retention optimization."""
|
557
563
|
opportunities = []
|
558
|
-
|
564
|
+
|
559
565
|
for account_id in accounts or self._get_all_accounts():
|
560
566
|
try:
|
561
567
|
session = self._get_account_session(account_id)
|
562
|
-
logs_client = session.client(
|
563
|
-
|
568
|
+
logs_client = session.client("logs")
|
569
|
+
|
564
570
|
response = logs_client.describe_log_groups()
|
565
|
-
|
566
|
-
for log_group in response.get(
|
567
|
-
log_group_name = log_group[
|
568
|
-
retention_days = log_group.get(
|
569
|
-
|
571
|
+
|
572
|
+
for log_group in response.get("logGroups", []):
|
573
|
+
log_group_name = log_group["logGroupName"]
|
574
|
+
retention_days = log_group.get("retentionInDays")
|
575
|
+
|
570
576
|
# If retention is not set or too long (default is "never expire")
|
571
577
|
if not retention_days or retention_days > 90:
|
572
578
|
# Estimate savings from setting 30-day retention
|
573
579
|
estimated_monthly_cost = 50 # Mock estimate
|
574
580
|
potential_savings = estimated_monthly_cost * 0.6 # 60% reduction
|
575
|
-
|
581
|
+
|
576
582
|
opportunity = CostSavingsOpportunity(
|
577
|
-
resource_type=
|
583
|
+
resource_type="cloudwatch_log_group",
|
578
584
|
resource_id=log_group_name,
|
579
585
|
account_id=account_id,
|
580
586
|
current_cost=estimated_monthly_cost,
|
581
587
|
potential_savings=potential_savings,
|
582
|
-
confidence=
|
583
|
-
action_required=
|
584
|
-
implementation_effort=
|
585
|
-
business_impact=
|
588
|
+
confidence="medium",
|
589
|
+
action_required="set_log_retention_30_days",
|
590
|
+
implementation_effort="low",
|
591
|
+
business_impact="low",
|
586
592
|
)
|
587
593
|
opportunities.append(opportunity)
|
588
|
-
|
594
|
+
|
589
595
|
except Exception as e:
|
590
596
|
print(f"Error analyzing log retention for {account_id}: {e}")
|
591
|
-
|
597
|
+
|
592
598
|
return opportunities
|
593
|
-
|
599
|
+
|
594
600
|
def find_nat_gateway_waste(self, accounts: List[str]) -> List[CostSavingsOpportunity]:
|
595
601
|
"""Find underutilized or unnecessary NAT Gateways."""
|
596
602
|
opportunities = []
|
597
|
-
|
603
|
+
|
598
604
|
for account_id in accounts or self._get_all_accounts():
|
599
605
|
try:
|
600
606
|
session = self._get_account_session(account_id)
|
601
|
-
ec2 = session.client(
|
602
|
-
|
607
|
+
ec2 = session.client("ec2")
|
608
|
+
|
603
609
|
# Get all NAT Gateways
|
604
610
|
response = ec2.describe_nat_gateways()
|
605
|
-
|
606
|
-
for nat_gw in response.get(
|
607
|
-
if nat_gw[
|
608
|
-
nat_gw_id = nat_gw[
|
609
|
-
|
611
|
+
|
612
|
+
for nat_gw in response.get("NatGateways", []):
|
613
|
+
if nat_gw["State"] == "available":
|
614
|
+
nat_gw_id = nat_gw["NatGatewayId"]
|
615
|
+
|
610
616
|
# NAT Gateway costs ~$45/month + data transfer
|
611
617
|
base_cost = 45
|
612
618
|
data_transfer_cost = 30 # Estimated
|
613
619
|
total_monthly_cost = base_cost + data_transfer_cost
|
614
|
-
|
620
|
+
|
615
621
|
# Check if it's actually being used (simplified check)
|
616
622
|
# In production, would check route tables and traffic metrics
|
617
623
|
opportunity = CostSavingsOpportunity(
|
618
|
-
resource_type=
|
624
|
+
resource_type="nat_gateway",
|
619
625
|
resource_id=nat_gw_id,
|
620
626
|
account_id=account_id,
|
621
627
|
current_cost=total_monthly_cost,
|
622
628
|
potential_savings=total_monthly_cost * 0.8, # 80% savings potential
|
623
|
-
confidence=
|
624
|
-
action_required=
|
625
|
-
implementation_effort=
|
626
|
-
business_impact=
|
629
|
+
confidence="medium",
|
630
|
+
action_required="evaluate_nat_gateway_necessity",
|
631
|
+
implementation_effort="medium",
|
632
|
+
business_impact="low",
|
627
633
|
)
|
628
634
|
opportunities.append(opportunity)
|
629
|
-
|
635
|
+
|
630
636
|
except Exception as e:
|
631
637
|
print(f"Error analyzing NAT Gateways for {account_id}: {e}")
|
632
|
-
|
638
|
+
|
633
639
|
return opportunities
|
634
|
-
|
640
|
+
|
635
641
|
def find_cloudtrail_waste(self, accounts: List[str]) -> List[CostSavingsOpportunity]:
|
636
642
|
"""Find CloudTrail logging waste and optimization opportunities."""
|
637
643
|
opportunities = []
|
638
|
-
|
644
|
+
|
639
645
|
for account_id in accounts or self._get_all_accounts():
|
640
646
|
try:
|
641
647
|
session = self._get_account_session(account_id)
|
642
|
-
cloudtrail = session.client(
|
643
|
-
|
648
|
+
cloudtrail = session.client("cloudtrail")
|
649
|
+
|
644
650
|
response = cloudtrail.describe_trails()
|
645
|
-
|
646
|
-
for trail in response.get(
|
647
|
-
trail_name = trail[
|
648
|
-
|
651
|
+
|
652
|
+
for trail in response.get("trailList", []):
|
653
|
+
trail_name = trail["Name"]
|
654
|
+
|
649
655
|
# Check for multiple overlapping trails
|
650
|
-
if trail.get(
|
656
|
+
if trail.get("IsMultiRegionTrail", False):
|
651
657
|
# Estimate CloudTrail costs - data events can be expensive
|
652
658
|
estimated_monthly_cost = 25 # Base cost
|
653
|
-
|
659
|
+
|
654
660
|
# Check if data events are enabled (costly)
|
655
661
|
try:
|
656
662
|
event_selectors = cloudtrail.get_event_selectors(TrailName=trail_name)
|
657
|
-
if event_selectors.get(
|
663
|
+
if event_selectors.get("EventSelectors"):
|
658
664
|
estimated_monthly_cost += 150 # Data events are expensive
|
659
|
-
|
665
|
+
|
660
666
|
opportunity = CostSavingsOpportunity(
|
661
|
-
resource_type=
|
667
|
+
resource_type="cloudtrail_data_events",
|
662
668
|
resource_id=trail_name,
|
663
669
|
account_id=account_id,
|
664
670
|
current_cost=estimated_monthly_cost,
|
665
671
|
potential_savings=150, # Save on data events
|
666
|
-
confidence=
|
667
|
-
action_required=
|
668
|
-
implementation_effort=
|
669
|
-
business_impact=
|
672
|
+
confidence="medium",
|
673
|
+
action_required="optimize_cloudtrail_data_events",
|
674
|
+
implementation_effort="low",
|
675
|
+
business_impact="low",
|
670
676
|
)
|
671
677
|
opportunities.append(opportunity)
|
672
678
|
except Exception:
|
673
679
|
pass
|
674
|
-
|
680
|
+
|
675
681
|
except Exception as e:
|
676
682
|
print(f"Error analyzing CloudTrail for {account_id}: {e}")
|
677
|
-
|
683
|
+
|
678
684
|
return opportunities
|
679
|
-
|
685
|
+
|
680
686
|
def find_cloudwatch_metrics_waste(self, accounts: List[str]) -> List[CostSavingsOpportunity]:
|
681
687
|
"""Find unused CloudWatch custom metrics."""
|
682
688
|
opportunities = []
|
683
|
-
|
689
|
+
|
684
690
|
for account_id in accounts or self._get_all_accounts():
|
685
691
|
try:
|
686
692
|
session = self._get_account_session(account_id)
|
687
|
-
cloudwatch = session.client(
|
688
|
-
|
693
|
+
cloudwatch = session.client("cloudwatch")
|
694
|
+
|
689
695
|
# Get all custom metrics (simplified)
|
690
696
|
response = cloudwatch.list_metrics()
|
691
|
-
|
692
|
-
custom_metrics_count = len(
|
693
|
-
m for m in response.get(
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
+
|
698
|
+
custom_metrics_count = len(
|
699
|
+
[m for m in response.get("Metrics", []) if not m["Namespace"].startswith("AWS/")]
|
700
|
+
)
|
701
|
+
|
697
702
|
if custom_metrics_count > 10: # Threshold for optimization
|
698
703
|
# Custom metrics cost $0.30 per metric per month
|
699
704
|
estimated_cost = custom_metrics_count * 0.30
|
700
705
|
potential_savings = estimated_cost * 0.4 # 40% reduction
|
701
|
-
|
706
|
+
|
702
707
|
opportunity = CostSavingsOpportunity(
|
703
|
-
resource_type=
|
704
|
-
resource_id=f
|
708
|
+
resource_type="cloudwatch_custom_metrics",
|
709
|
+
resource_id=f"{custom_metrics_count}_custom_metrics",
|
705
710
|
account_id=account_id,
|
706
711
|
current_cost=estimated_cost,
|
707
712
|
potential_savings=potential_savings,
|
708
|
-
confidence=
|
709
|
-
action_required=
|
710
|
-
implementation_effort=
|
711
|
-
business_impact=
|
713
|
+
confidence="medium",
|
714
|
+
action_required="cleanup_unused_custom_metrics",
|
715
|
+
implementation_effort="medium",
|
716
|
+
business_impact="low",
|
712
717
|
)
|
713
718
|
opportunities.append(opportunity)
|
714
|
-
|
719
|
+
|
715
720
|
except Exception as e:
|
716
721
|
print(f"Error analyzing CloudWatch metrics for {account_id}: {e}")
|
717
|
-
|
722
|
+
|
718
723
|
return opportunities
|
719
|
-
|
724
|
+
|
720
725
|
def find_unused_security_groups(self, accounts: List[str]) -> List[CostSavingsOpportunity]:
|
721
726
|
"""Find unused security groups (no direct cost but operational overhead)."""
|
722
727
|
opportunities = []
|
723
|
-
|
728
|
+
|
724
729
|
# Note: Security groups don't have direct costs, but unused ones create
|
725
730
|
# operational overhead and potential security risks
|
726
731
|
for account_id in accounts or self._get_all_accounts():
|
727
732
|
try:
|
728
733
|
session = self._get_account_session(account_id)
|
729
|
-
ec2 = session.client(
|
730
|
-
|
734
|
+
ec2 = session.client("ec2")
|
735
|
+
|
731
736
|
# Get all security groups
|
732
737
|
response = ec2.describe_security_groups()
|
733
|
-
all_sgs = response[
|
734
|
-
|
738
|
+
all_sgs = response["SecurityGroups"]
|
739
|
+
|
735
740
|
# Get all network interfaces to find used security groups
|
736
741
|
ni_response = ec2.describe_network_interfaces()
|
737
742
|
used_sg_ids = set()
|
738
|
-
|
739
|
-
for ni in ni_response[
|
740
|
-
for sg in ni.get(
|
741
|
-
used_sg_ids.add(sg[
|
742
|
-
|
743
|
-
unused_sgs = [sg for sg in all_sgs if sg[
|
744
|
-
|
743
|
+
|
744
|
+
for ni in ni_response["NetworkInterfaces"]:
|
745
|
+
for sg in ni.get("Groups", []):
|
746
|
+
used_sg_ids.add(sg["GroupId"])
|
747
|
+
|
748
|
+
unused_sgs = [sg for sg in all_sgs if sg["GroupId"] not in used_sg_ids and sg["GroupName"] != "default"]
|
749
|
+
|
745
750
|
if len(unused_sgs) > 5: # Only report if significant number
|
746
751
|
# No direct cost savings, but operational efficiency
|
747
752
|
opportunity = CostSavingsOpportunity(
|
748
|
-
resource_type=
|
749
|
-
resource_id=f
|
753
|
+
resource_type="unused_security_groups",
|
754
|
+
resource_id=f"{len(unused_sgs)}_unused_sgs",
|
750
755
|
account_id=account_id,
|
751
756
|
current_cost=0, # No direct cost
|
752
757
|
potential_savings=0, # Operational benefits
|
753
|
-
confidence=
|
754
|
-
action_required=
|
755
|
-
implementation_effort=
|
756
|
-
business_impact=
|
758
|
+
confidence="high",
|
759
|
+
action_required="cleanup_unused_security_groups",
|
760
|
+
implementation_effort="low",
|
761
|
+
business_impact="low",
|
757
762
|
)
|
758
763
|
opportunities.append(opportunity)
|
759
|
-
|
764
|
+
|
760
765
|
except Exception as e:
|
761
766
|
print(f"Error analyzing security groups for {account_id}: {e}")
|
762
|
-
|
767
|
+
|
763
768
|
return opportunities
|
764
|
-
|
769
|
+
|
765
770
|
def analyze_reserved_instance_opportunities(self, accounts: List[str]) -> List[CostSavingsOpportunity]:
|
766
771
|
"""Analyze Reserved Instance purchase opportunities."""
|
767
772
|
opportunities = []
|
768
|
-
|
773
|
+
|
769
774
|
for account_id in accounts or self._get_all_accounts():
|
770
775
|
try:
|
771
776
|
session = self._get_account_session(account_id)
|
772
|
-
ec2 = session.client(
|
773
|
-
|
777
|
+
ec2 = session.client("ec2")
|
778
|
+
|
774
779
|
# Get running instances
|
775
|
-
instances_response = ec2.describe_instances(
|
776
|
-
|
777
|
-
)
|
778
|
-
|
780
|
+
instances_response = ec2.describe_instances(Filters=[{"Name": "state", "Values": ["running"]}])
|
781
|
+
|
779
782
|
# Count instances by type
|
780
783
|
instance_types = {}
|
781
|
-
for reservation in instances_response[
|
782
|
-
for instance in reservation[
|
783
|
-
instance_type = instance[
|
784
|
+
for reservation in instances_response["Reservations"]:
|
785
|
+
for instance in reservation["Instances"]:
|
786
|
+
instance_type = instance["InstanceType"]
|
784
787
|
instance_types[instance_type] = instance_types.get(instance_type, 0) + 1
|
785
|
-
|
788
|
+
|
786
789
|
# Get existing RIs
|
787
|
-
ri_response = ec2.describe_reserved_instances(
|
788
|
-
|
789
|
-
)
|
790
|
-
|
790
|
+
ri_response = ec2.describe_reserved_instances(Filters=[{"Name": "state", "Values": ["active"]}])
|
791
|
+
|
791
792
|
reserved_by_type = {}
|
792
|
-
for ri in ri_response[
|
793
|
-
instance_type = ri[
|
794
|
-
reserved_by_type[instance_type] = reserved_by_type.get(instance_type, 0) + ri[
|
795
|
-
|
793
|
+
for ri in ri_response["ReservedInstances"]:
|
794
|
+
instance_type = ri["InstanceType"]
|
795
|
+
reserved_by_type[instance_type] = reserved_by_type.get(instance_type, 0) + ri["InstanceCount"]
|
796
|
+
|
796
797
|
# Calculate RI opportunities
|
797
798
|
for instance_type, running_count in instance_types.items():
|
798
799
|
reserved_count = reserved_by_type.get(instance_type, 0)
|
799
800
|
unreserved_count = max(0, running_count - reserved_count)
|
800
|
-
|
801
|
+
|
801
802
|
if unreserved_count >= 3: # Threshold for RI recommendation
|
802
803
|
monthly_on_demand = self._estimate_ec2_monthly_cost(instance_type)
|
803
804
|
monthly_ri = monthly_on_demand * 0.6 # ~40% savings with 1-year RI
|
804
805
|
monthly_savings = (monthly_on_demand - monthly_ri) * unreserved_count
|
805
|
-
|
806
|
+
|
806
807
|
opportunity = CostSavingsOpportunity(
|
807
|
-
resource_type=
|
808
|
-
resource_id=f
|
808
|
+
resource_type="reserved_instance_opportunity",
|
809
|
+
resource_id=f"{instance_type}_{unreserved_count}_instances",
|
809
810
|
account_id=account_id,
|
810
811
|
current_cost=monthly_on_demand * unreserved_count,
|
811
812
|
potential_savings=monthly_savings,
|
812
|
-
confidence=
|
813
|
-
action_required=f
|
814
|
-
implementation_effort=
|
815
|
-
business_impact=
|
813
|
+
confidence="high",
|
814
|
+
action_required=f"purchase_reserved_instances_{instance_type}",
|
815
|
+
implementation_effort="low",
|
816
|
+
business_impact="low",
|
816
817
|
)
|
817
818
|
opportunities.append(opportunity)
|
818
|
-
|
819
|
+
|
819
820
|
except Exception as e:
|
820
821
|
print(f"Error analyzing RI opportunities for {account_id}: {e}")
|
821
|
-
|
822
|
-
return opportunities
|
822
|
+
|
823
|
+
return opportunities
|