runbooks 1.1.4__py3-none-any.whl → 1.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- runbooks/__init__.py +31 -2
- runbooks/__init___optimized.py +18 -4
- runbooks/_platform/__init__.py +1 -5
- runbooks/_platform/core/runbooks_wrapper.py +141 -138
- runbooks/aws2/accuracy_validator.py +812 -0
- runbooks/base.py +7 -0
- runbooks/cfat/assessment/compliance.py +1 -1
- runbooks/cfat/assessment/runner.py +1 -0
- runbooks/cfat/cloud_foundations_assessment.py +227 -239
- runbooks/cli/__init__.py +1 -1
- runbooks/cli/commands/cfat.py +64 -23
- runbooks/cli/commands/finops.py +1005 -54
- runbooks/cli/commands/inventory.py +138 -35
- runbooks/cli/commands/operate.py +9 -36
- runbooks/cli/commands/security.py +42 -18
- runbooks/cli/commands/validation.py +432 -18
- runbooks/cli/commands/vpc.py +81 -17
- runbooks/cli/registry.py +22 -10
- runbooks/cloudops/__init__.py +20 -27
- runbooks/cloudops/base.py +96 -107
- runbooks/cloudops/cost_optimizer.py +544 -542
- runbooks/cloudops/infrastructure_optimizer.py +5 -4
- runbooks/cloudops/interfaces.py +224 -225
- runbooks/cloudops/lifecycle_manager.py +5 -4
- runbooks/cloudops/mcp_cost_validation.py +252 -235
- runbooks/cloudops/models.py +78 -53
- runbooks/cloudops/monitoring_automation.py +5 -4
- runbooks/cloudops/notebook_framework.py +177 -213
- runbooks/cloudops/security_enforcer.py +125 -159
- runbooks/common/accuracy_validator.py +11 -0
- runbooks/common/aws_pricing.py +349 -326
- runbooks/common/aws_pricing_api.py +211 -212
- runbooks/common/aws_profile_manager.py +40 -36
- runbooks/common/aws_utils.py +74 -79
- runbooks/common/business_logic.py +126 -104
- runbooks/common/cli_decorators.py +36 -60
- runbooks/common/comprehensive_cost_explorer_integration.py +455 -463
- runbooks/common/cross_account_manager.py +197 -204
- runbooks/common/date_utils.py +27 -39
- runbooks/common/decorators.py +29 -19
- runbooks/common/dry_run_examples.py +173 -208
- runbooks/common/dry_run_framework.py +157 -155
- runbooks/common/enhanced_exception_handler.py +15 -4
- runbooks/common/enhanced_logging_example.py +50 -64
- runbooks/common/enhanced_logging_integration_example.py +65 -37
- runbooks/common/env_utils.py +16 -16
- runbooks/common/error_handling.py +40 -38
- runbooks/common/lazy_loader.py +41 -23
- runbooks/common/logging_integration_helper.py +79 -86
- runbooks/common/mcp_cost_explorer_integration.py +476 -493
- runbooks/common/mcp_integration.py +63 -74
- runbooks/common/memory_optimization.py +140 -118
- runbooks/common/module_cli_base.py +37 -58
- runbooks/common/organizations_client.py +175 -193
- runbooks/common/patterns.py +23 -25
- runbooks/common/performance_monitoring.py +67 -71
- runbooks/common/performance_optimization_engine.py +283 -274
- runbooks/common/profile_utils.py +111 -37
- runbooks/common/rich_utils.py +201 -141
- runbooks/common/sre_performance_suite.py +177 -186
- runbooks/enterprise/__init__.py +1 -1
- runbooks/enterprise/logging.py +144 -106
- runbooks/enterprise/security.py +187 -204
- runbooks/enterprise/validation.py +43 -56
- runbooks/finops/__init__.py +26 -30
- runbooks/finops/account_resolver.py +1 -1
- runbooks/finops/advanced_optimization_engine.py +980 -0
- runbooks/finops/automation_core.py +268 -231
- runbooks/finops/business_case_config.py +184 -179
- runbooks/finops/cli.py +660 -139
- runbooks/finops/commvault_ec2_analysis.py +157 -164
- runbooks/finops/compute_cost_optimizer.py +336 -320
- runbooks/finops/config.py +20 -20
- runbooks/finops/cost_optimizer.py +484 -618
- runbooks/finops/cost_processor.py +332 -214
- runbooks/finops/dashboard_runner.py +1006 -172
- runbooks/finops/ebs_cost_optimizer.py +991 -657
- runbooks/finops/elastic_ip_optimizer.py +317 -257
- runbooks/finops/enhanced_mcp_integration.py +340 -0
- runbooks/finops/enhanced_progress.py +32 -29
- runbooks/finops/enhanced_trend_visualization.py +3 -2
- runbooks/finops/enterprise_wrappers.py +223 -285
- runbooks/finops/executive_export.py +203 -160
- runbooks/finops/helpers.py +130 -288
- runbooks/finops/iam_guidance.py +1 -1
- runbooks/finops/infrastructure/__init__.py +80 -0
- runbooks/finops/infrastructure/commands.py +506 -0
- runbooks/finops/infrastructure/load_balancer_optimizer.py +866 -0
- runbooks/finops/infrastructure/vpc_endpoint_optimizer.py +832 -0
- runbooks/finops/markdown_exporter.py +337 -174
- runbooks/finops/mcp_validator.py +1952 -0
- runbooks/finops/nat_gateway_optimizer.py +1512 -481
- runbooks/finops/network_cost_optimizer.py +657 -587
- runbooks/finops/notebook_utils.py +226 -188
- runbooks/finops/optimization_engine.py +1136 -0
- runbooks/finops/optimizer.py +19 -23
- runbooks/finops/rds_snapshot_optimizer.py +367 -411
- runbooks/finops/reservation_optimizer.py +427 -363
- runbooks/finops/scenario_cli_integration.py +64 -65
- runbooks/finops/scenarios.py +1277 -438
- runbooks/finops/schemas.py +218 -182
- runbooks/finops/snapshot_manager.py +2289 -0
- runbooks/finops/types.py +3 -3
- runbooks/finops/validation_framework.py +259 -265
- runbooks/finops/vpc_cleanup_exporter.py +189 -144
- runbooks/finops/vpc_cleanup_optimizer.py +591 -573
- runbooks/finops/workspaces_analyzer.py +171 -182
- runbooks/integration/__init__.py +89 -0
- runbooks/integration/mcp_integration.py +1920 -0
- runbooks/inventory/CLAUDE.md +816 -0
- runbooks/inventory/__init__.py +2 -2
- runbooks/inventory/cloud_foundations_integration.py +144 -149
- runbooks/inventory/collectors/aws_comprehensive.py +1 -1
- runbooks/inventory/collectors/aws_networking.py +109 -99
- runbooks/inventory/collectors/base.py +4 -0
- runbooks/inventory/core/collector.py +495 -313
- runbooks/inventory/drift_detection_cli.py +69 -96
- runbooks/inventory/inventory_mcp_cli.py +48 -46
- runbooks/inventory/list_rds_snapshots_aggregator.py +192 -208
- runbooks/inventory/mcp_inventory_validator.py +549 -465
- runbooks/inventory/mcp_vpc_validator.py +359 -442
- runbooks/inventory/organizations_discovery.py +55 -51
- runbooks/inventory/rich_inventory_display.py +33 -32
- runbooks/inventory/unified_validation_engine.py +278 -251
- runbooks/inventory/vpc_analyzer.py +732 -695
- runbooks/inventory/vpc_architecture_validator.py +293 -348
- runbooks/inventory/vpc_dependency_analyzer.py +382 -378
- runbooks/inventory/vpc_flow_analyzer.py +1 -1
- runbooks/main.py +49 -34
- runbooks/main_final.py +91 -60
- runbooks/main_minimal.py +22 -10
- runbooks/main_optimized.py +131 -100
- runbooks/main_ultra_minimal.py +7 -2
- runbooks/mcp/__init__.py +36 -0
- runbooks/mcp/integration.py +679 -0
- runbooks/monitoring/performance_monitor.py +9 -4
- runbooks/operate/dynamodb_operations.py +3 -1
- runbooks/operate/ec2_operations.py +145 -137
- runbooks/operate/iam_operations.py +146 -152
- runbooks/operate/networking_cost_heatmap.py +29 -8
- runbooks/operate/rds_operations.py +223 -254
- runbooks/operate/s3_operations.py +107 -118
- runbooks/operate/vpc_operations.py +646 -616
- runbooks/remediation/base.py +1 -1
- runbooks/remediation/commons.py +10 -7
- runbooks/remediation/commvault_ec2_analysis.py +70 -66
- runbooks/remediation/ec2_unattached_ebs_volumes.py +1 -0
- runbooks/remediation/multi_account.py +24 -21
- runbooks/remediation/rds_snapshot_list.py +86 -60
- runbooks/remediation/remediation_cli.py +92 -146
- runbooks/remediation/universal_account_discovery.py +83 -79
- runbooks/remediation/workspaces_list.py +46 -41
- runbooks/security/__init__.py +19 -0
- runbooks/security/assessment_runner.py +1150 -0
- runbooks/security/baseline_checker.py +812 -0
- runbooks/security/cloudops_automation_security_validator.py +509 -535
- runbooks/security/compliance_automation_engine.py +17 -17
- runbooks/security/config/__init__.py +2 -2
- runbooks/security/config/compliance_config.py +50 -50
- runbooks/security/config_template_generator.py +63 -76
- runbooks/security/enterprise_security_framework.py +1 -1
- runbooks/security/executive_security_dashboard.py +519 -508
- runbooks/security/multi_account_security_controls.py +959 -1210
- runbooks/security/real_time_security_monitor.py +422 -444
- runbooks/security/security_baseline_tester.py +1 -1
- runbooks/security/security_cli.py +143 -112
- runbooks/security/test_2way_validation.py +439 -0
- runbooks/security/two_way_validation_framework.py +852 -0
- runbooks/sre/production_monitoring_framework.py +167 -177
- runbooks/tdd/__init__.py +15 -0
- runbooks/tdd/cli.py +1071 -0
- runbooks/utils/__init__.py +14 -17
- runbooks/utils/logger.py +7 -2
- runbooks/utils/version_validator.py +50 -47
- runbooks/validation/__init__.py +6 -6
- runbooks/validation/cli.py +9 -3
- runbooks/validation/comprehensive_2way_validator.py +745 -704
- runbooks/validation/mcp_validator.py +906 -228
- runbooks/validation/terraform_citations_validator.py +104 -115
- runbooks/validation/terraform_drift_detector.py +447 -451
- runbooks/vpc/README.md +617 -0
- runbooks/vpc/__init__.py +8 -1
- runbooks/vpc/analyzer.py +577 -0
- runbooks/vpc/cleanup_wrapper.py +476 -413
- runbooks/vpc/cli_cloudtrail_commands.py +339 -0
- runbooks/vpc/cli_mcp_validation_commands.py +480 -0
- runbooks/vpc/cloudtrail_audit_integration.py +717 -0
- runbooks/vpc/config.py +92 -97
- runbooks/vpc/cost_engine.py +411 -148
- runbooks/vpc/cost_explorer_integration.py +553 -0
- runbooks/vpc/cross_account_session.py +101 -106
- runbooks/vpc/enhanced_mcp_validation.py +917 -0
- runbooks/vpc/eni_gate_validator.py +961 -0
- runbooks/vpc/heatmap_engine.py +185 -160
- runbooks/vpc/mcp_no_eni_validator.py +680 -639
- runbooks/vpc/nat_gateway_optimizer.py +358 -0
- runbooks/vpc/networking_wrapper.py +15 -8
- runbooks/vpc/pdca_remediation_planner.py +528 -0
- runbooks/vpc/performance_optimized_analyzer.py +219 -231
- runbooks/vpc/runbooks_adapter.py +1167 -241
- runbooks/vpc/tdd_red_phase_stubs.py +601 -0
- runbooks/vpc/test_data_loader.py +358 -0
- runbooks/vpc/tests/conftest.py +314 -4
- runbooks/vpc/tests/test_cleanup_framework.py +1022 -0
- runbooks/vpc/tests/test_cost_engine.py +0 -2
- runbooks/vpc/topology_generator.py +326 -0
- runbooks/vpc/unified_scenarios.py +1297 -1124
- runbooks/vpc/vpc_cleanup_integration.py +1943 -1115
- runbooks-1.1.5.dist-info/METADATA +328 -0
- {runbooks-1.1.4.dist-info → runbooks-1.1.5.dist-info}/RECORD +214 -193
- runbooks/finops/README.md +0 -414
- runbooks/finops/accuracy_cross_validator.py +0 -647
- runbooks/finops/business_cases.py +0 -950
- runbooks/finops/dashboard_router.py +0 -922
- runbooks/finops/ebs_optimizer.py +0 -973
- runbooks/finops/embedded_mcp_validator.py +0 -1629
- runbooks/finops/enhanced_dashboard_runner.py +0 -527
- runbooks/finops/finops_dashboard.py +0 -584
- runbooks/finops/finops_scenarios.py +0 -1218
- runbooks/finops/legacy_migration.py +0 -730
- runbooks/finops/multi_dashboard.py +0 -1519
- runbooks/finops/single_dashboard.py +0 -1113
- runbooks/finops/unlimited_scenarios.py +0 -393
- runbooks-1.1.4.dist-info/METADATA +0 -800
- {runbooks-1.1.4.dist-info → runbooks-1.1.5.dist-info}/WHEEL +0 -0
- {runbooks-1.1.4.dist-info → runbooks-1.1.5.dist-info}/entry_points.txt +0 -0
- {runbooks-1.1.4.dist-info → runbooks-1.1.5.dist-info}/licenses/LICENSE +0 -0
- {runbooks-1.1.4.dist-info → runbooks-1.1.5.dist-info}/top_level.txt +0 -0
@@ -29,20 +29,35 @@ from datetime import datetime, timedelta
|
|
29
29
|
from dataclasses import dataclass
|
30
30
|
|
31
31
|
from runbooks.common.rich_utils import (
|
32
|
-
console,
|
33
|
-
|
32
|
+
console,
|
33
|
+
print_header,
|
34
|
+
print_success,
|
35
|
+
print_error,
|
36
|
+
print_warning,
|
37
|
+
print_info,
|
38
|
+
create_table,
|
39
|
+
create_progress_bar,
|
40
|
+
format_cost,
|
41
|
+
create_panel,
|
34
42
|
)
|
35
43
|
from runbooks.common.aws_pricing import get_service_monthly_cost, calculate_annual_cost, calculate_regional_cost
|
36
44
|
from runbooks.common.env_utils import get_required_env_float
|
37
45
|
from .base import CloudOpsBase
|
38
46
|
from .models import (
|
39
|
-
CostOptimizationResult,
|
40
|
-
|
47
|
+
CostOptimizationResult,
|
48
|
+
BusinessScenario,
|
49
|
+
ExecutionMode,
|
50
|
+
RiskLevel,
|
51
|
+
ResourceImpact,
|
52
|
+
BusinessMetrics,
|
53
|
+
ComplianceMetrics,
|
41
54
|
)
|
42
55
|
|
56
|
+
|
43
57
|
@dataclass
|
44
58
|
class CostAnalysisData:
|
45
59
|
"""Internal data structure for cost analysis."""
|
60
|
+
|
46
61
|
resource_id: str
|
47
62
|
resource_type: str
|
48
63
|
region: str
|
@@ -52,24 +67,25 @@ class CostAnalysisData:
|
|
52
67
|
projected_savings: float
|
53
68
|
risk_assessment: str
|
54
69
|
|
70
|
+
|
55
71
|
class CostOptimizer(CloudOpsBase):
|
56
72
|
"""
|
57
73
|
Cost optimization scenarios for emergency response and routine optimization.
|
58
|
-
|
74
|
+
|
59
75
|
Business Use Cases:
|
60
76
|
1. Emergency cost spike investigation and remediation
|
61
|
-
2. Routine cost optimization campaigns
|
77
|
+
2. Routine cost optimization campaigns
|
62
78
|
3. Reserved instance planning and optimization
|
63
79
|
4. Idle resource identification and cleanup
|
64
80
|
5. Executive cost reporting and analysis
|
65
81
|
"""
|
66
|
-
|
82
|
+
|
67
83
|
def __init__(
|
68
84
|
self,
|
69
85
|
profile: str = "default",
|
70
86
|
dry_run: bool = True,
|
71
87
|
execution_mode: ExecutionMode = ExecutionMode.DRY_RUN,
|
72
|
-
region: str = "us-east-1"
|
88
|
+
region: str = "us-east-1",
|
73
89
|
):
|
74
90
|
"""
|
75
91
|
Initialize Cost Optimizer with enterprise patterns.
|
@@ -86,6 +102,7 @@ class CostOptimizer(CloudOpsBase):
|
|
86
102
|
self.region = region
|
87
103
|
|
88
104
|
from runbooks import __version__
|
105
|
+
|
89
106
|
print_header("CloudOps Cost Optimizer", __version__)
|
90
107
|
print_info(f"Execution mode: {execution_mode.value}")
|
91
108
|
print_info(f"Profile: {profile}")
|
@@ -103,11 +120,12 @@ class CostOptimizer(CloudOpsBase):
|
|
103
120
|
Returns:
|
104
121
|
Execution time in seconds
|
105
122
|
"""
|
106
|
-
if hasattr(self,
|
123
|
+
if hasattr(self, "operation_start_time"):
|
107
124
|
return time.time() - self.operation_start_time
|
108
125
|
else:
|
109
126
|
# Fallback if start time not tracked
|
110
127
|
import time
|
128
|
+
|
111
129
|
return time.time() - time.time() # Returns ~0.0
|
112
130
|
|
113
131
|
def _suggest_smaller_instance_type(self, instance_type: str) -> Optional[str]:
|
@@ -123,326 +141,329 @@ class CostOptimizer(CloudOpsBase):
|
|
123
141
|
# Simple rightsizing mapping - can be enhanced with CloudWatch metrics
|
124
142
|
rightsizing_map = {
|
125
143
|
# T3 family
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
144
|
+
"t3.large": "t3.medium",
|
145
|
+
"t3.xlarge": "t3.large",
|
146
|
+
"t3.2xlarge": "t3.xlarge",
|
130
147
|
# M5 family
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
148
|
+
"m5.large": "m5.medium",
|
149
|
+
"m5.xlarge": "m5.large",
|
150
|
+
"m5.2xlarge": "m5.xlarge",
|
151
|
+
"m5.4xlarge": "m5.2xlarge",
|
136
152
|
# C5 family
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
153
|
+
"c5.large": "c5.medium",
|
154
|
+
"c5.xlarge": "c5.large",
|
155
|
+
"c5.2xlarge": "c5.xlarge",
|
156
|
+
"c5.4xlarge": "c5.2xlarge",
|
142
157
|
# R5 family
|
143
|
-
|
144
|
-
|
145
|
-
|
158
|
+
"r5.large": "r5.medium",
|
159
|
+
"r5.xlarge": "r5.large",
|
160
|
+
"r5.2xlarge": "r5.xlarge",
|
146
161
|
}
|
147
162
|
|
148
163
|
return rightsizing_map.get(instance_type)
|
149
164
|
|
150
165
|
async def discover_infrastructure(
|
151
|
-
self,
|
152
|
-
regions: Optional[List[str]] = None,
|
153
|
-
services: Optional[List[str]] = None
|
166
|
+
self, regions: Optional[List[str]] = None, services: Optional[List[str]] = None
|
154
167
|
) -> Any:
|
155
168
|
"""
|
156
169
|
Comprehensive infrastructure discovery for cost optimization analysis.
|
157
|
-
|
170
|
+
|
158
171
|
Args:
|
159
172
|
regions: AWS regions to analyze (default: common regions)
|
160
173
|
services: AWS services to discover (default: cost-relevant services)
|
161
|
-
|
174
|
+
|
162
175
|
Returns:
|
163
176
|
Discovery result with resource counts and cost estimates
|
164
177
|
"""
|
165
178
|
if regions is None:
|
166
|
-
regions = [
|
167
|
-
|
179
|
+
regions = ["us-east-1", "us-west-2", "eu-west-1", "ap-southeast-1"]
|
180
|
+
|
168
181
|
if services is None:
|
169
|
-
services = [
|
170
|
-
|
171
|
-
discovery_data = {
|
172
|
-
|
173
|
-
'service_summaries': [],
|
174
|
-
'estimated_total_cost': 0.0
|
175
|
-
}
|
176
|
-
|
182
|
+
services = ["ec2", "ebs", "s3", "rds", "vpc", "lambda"]
|
183
|
+
|
184
|
+
discovery_data = {"resources_analyzed": 0, "service_summaries": [], "estimated_total_cost": 0.0}
|
185
|
+
|
177
186
|
print_info("🔍 Starting infrastructure discovery...")
|
178
|
-
|
187
|
+
|
179
188
|
with create_progress_bar() as progress:
|
180
|
-
discovery_task = progress.add_task(
|
181
|
-
|
182
|
-
total=len(services)
|
183
|
-
)
|
184
|
-
|
189
|
+
discovery_task = progress.add_task("[cyan]Discovering AWS resources...", total=len(services))
|
190
|
+
|
185
191
|
for service in services:
|
186
|
-
service_summary = await self._discover_service_resources(
|
187
|
-
|
188
|
-
|
189
|
-
discovery_data[
|
190
|
-
|
191
|
-
discovery_data['estimated_total_cost'] += service_summary['estimated_cost']
|
192
|
-
|
192
|
+
service_summary = await self._discover_service_resources(service, regions)
|
193
|
+
discovery_data["service_summaries"].append(service_summary)
|
194
|
+
discovery_data["resources_analyzed"] += service_summary["resource_count"]
|
195
|
+
discovery_data["estimated_total_cost"] += service_summary["estimated_cost"]
|
196
|
+
|
193
197
|
progress.advance(discovery_task)
|
194
|
-
|
198
|
+
|
195
199
|
print_success(f"Discovery completed: {discovery_data['resources_analyzed']} resources found")
|
196
|
-
return type(
|
197
|
-
|
198
|
-
async def _discover_service_resources(
|
199
|
-
self,
|
200
|
-
service: str,
|
201
|
-
regions: List[str]
|
202
|
-
) -> Dict[str, Any]:
|
200
|
+
return type("DiscoveryResult", (), discovery_data)
|
201
|
+
|
202
|
+
async def _discover_service_resources(self, service: str, regions: List[str]) -> Dict[str, Any]:
|
203
203
|
"""Discover resources for a specific AWS service."""
|
204
204
|
try:
|
205
|
-
if service ==
|
205
|
+
if service == "ec2":
|
206
206
|
return await self._discover_ec2_resources(regions)
|
207
|
-
elif service ==
|
207
|
+
elif service == "ebs":
|
208
208
|
return await self._discover_ebs_resources(regions)
|
209
|
-
elif service ==
|
209
|
+
elif service == "s3":
|
210
210
|
return await self._discover_s3_resources()
|
211
|
-
elif service ==
|
211
|
+
elif service == "rds":
|
212
212
|
return await self._discover_rds_resources(regions)
|
213
|
-
elif service ==
|
213
|
+
elif service == "vpc":
|
214
214
|
return await self._discover_vpc_resources(regions)
|
215
215
|
else:
|
216
216
|
# Generic discovery for other services
|
217
217
|
return {
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
218
|
+
"service": service,
|
219
|
+
"resource_count": 0,
|
220
|
+
"estimated_cost": 0.0,
|
221
|
+
"optimization_opportunities": [],
|
222
222
|
}
|
223
223
|
except Exception as e:
|
224
224
|
print_warning(f"Service {service} discovery failed: {str(e)}")
|
225
|
-
return {
|
226
|
-
|
227
|
-
'resource_count': 0,
|
228
|
-
'estimated_cost': 0.0,
|
229
|
-
'error': str(e)
|
230
|
-
}
|
231
|
-
|
225
|
+
return {"service": service, "resource_count": 0, "estimated_cost": 0.0, "error": str(e)}
|
226
|
+
|
232
227
|
async def _discover_ec2_resources(self, regions: List[str]) -> Dict[str, Any]:
|
233
228
|
"""Discover EC2 instances across regions."""
|
234
229
|
total_instances = 0
|
235
230
|
estimated_cost = 0.0
|
236
|
-
|
231
|
+
|
237
232
|
for region in regions:
|
238
233
|
try:
|
239
|
-
ec2 = self.session.client(
|
234
|
+
ec2 = self.session.client("ec2", region_name=region)
|
240
235
|
response = ec2.describe_instances()
|
241
|
-
|
242
|
-
for reservation in response[
|
243
|
-
for instance in reservation[
|
244
|
-
if instance[
|
236
|
+
|
237
|
+
for reservation in response["Reservations"]:
|
238
|
+
for instance in reservation["Instances"]:
|
239
|
+
if instance["State"]["Name"] in ["running", "stopped"]:
|
245
240
|
total_instances += 1
|
246
241
|
# Dynamic cost estimation
|
247
|
-
instance_type = instance.get(
|
242
|
+
instance_type = instance.get("InstanceType", "t3.micro")
|
248
243
|
estimated_cost += self._estimate_ec2_cost(instance_type, region)
|
249
|
-
|
244
|
+
|
250
245
|
except Exception as e:
|
251
246
|
print_warning(f"EC2 discovery failed in {region}: {str(e)}")
|
252
|
-
|
247
|
+
|
253
248
|
return {
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
249
|
+
"service": "EC2",
|
250
|
+
"resource_count": total_instances,
|
251
|
+
"estimated_cost": estimated_cost,
|
252
|
+
"optimization_opportunities": ["rightsizing", "idle_detection", "reserved_instances"],
|
258
253
|
}
|
259
|
-
|
254
|
+
|
260
255
|
async def _discover_ebs_resources(self, regions: List[str]) -> Dict[str, Any]:
|
261
256
|
"""Discover EBS volumes across regions."""
|
262
257
|
total_volumes = 0
|
263
258
|
estimated_cost = 0.0
|
264
|
-
|
259
|
+
|
265
260
|
for region in regions:
|
266
261
|
try:
|
267
|
-
ec2 = self.session.client(
|
262
|
+
ec2 = self.session.client("ec2", region_name=region)
|
268
263
|
response = ec2.describe_volumes()
|
269
|
-
|
270
|
-
for volume in response[
|
264
|
+
|
265
|
+
for volume in response["Volumes"]:
|
271
266
|
total_volumes += 1
|
272
|
-
volume_size = volume.get(
|
273
|
-
volume_type = volume.get(
|
267
|
+
volume_size = volume.get("Size", 0)
|
268
|
+
volume_type = volume.get("VolumeType", "gp2")
|
274
269
|
estimated_cost += self._estimate_ebs_cost(volume_size, volume_type, region)
|
275
|
-
|
270
|
+
|
276
271
|
except Exception as e:
|
277
272
|
print_warning(f"EBS discovery failed in {region}: {str(e)}")
|
278
|
-
|
273
|
+
|
279
274
|
return {
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
275
|
+
"service": "EBS",
|
276
|
+
"resource_count": total_volumes,
|
277
|
+
"estimated_cost": estimated_cost,
|
278
|
+
"optimization_opportunities": ["unattached_volumes", "snapshot_cleanup", "storage_type_optimization"],
|
284
279
|
}
|
285
|
-
|
280
|
+
|
286
281
|
async def _discover_s3_resources(self) -> Dict[str, Any]:
|
287
282
|
"""Discover S3 buckets and estimate costs."""
|
288
283
|
try:
|
289
|
-
s3 = self.session.client(
|
284
|
+
s3 = self.session.client("s3")
|
290
285
|
response = s3.list_buckets()
|
291
|
-
|
292
|
-
bucket_count = len(response[
|
286
|
+
|
287
|
+
bucket_count = len(response["Buckets"])
|
293
288
|
# S3 cost estimation - using standard storage baseline per bucket
|
294
289
|
estimated_cost = bucket_count * get_service_monthly_cost("s3_standard", "us-east-1")
|
295
|
-
|
290
|
+
|
296
291
|
return {
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
292
|
+
"service": "S3",
|
293
|
+
"resource_count": bucket_count,
|
294
|
+
"estimated_cost": estimated_cost,
|
295
|
+
"optimization_opportunities": [
|
296
|
+
"lifecycle_policies",
|
297
|
+
"storage_class_optimization",
|
298
|
+
"request_optimization",
|
299
|
+
],
|
301
300
|
}
|
302
|
-
|
301
|
+
|
303
302
|
except Exception as e:
|
304
303
|
print_warning(f"S3 discovery failed: {str(e)}")
|
305
|
-
return {
|
306
|
-
|
304
|
+
return {"service": "S3", "resource_count": 0, "estimated_cost": 0.0}
|
305
|
+
|
307
306
|
async def _discover_rds_resources(self, regions: List[str]) -> Dict[str, Any]:
|
308
307
|
"""Discover RDS instances across regions."""
|
309
308
|
total_instances = 0
|
310
309
|
estimated_cost = 0.0
|
311
|
-
|
310
|
+
|
312
311
|
for region in regions:
|
313
312
|
try:
|
314
|
-
rds = self.session.client(
|
313
|
+
rds = self.session.client("rds", region_name=region)
|
315
314
|
response = rds.describe_db_instances()
|
316
|
-
|
317
|
-
for instance in response[
|
315
|
+
|
316
|
+
for instance in response["DBInstances"]:
|
318
317
|
total_instances += 1
|
319
|
-
instance_class = instance.get(
|
318
|
+
instance_class = instance.get("DBInstanceClass", "db.t3.micro")
|
320
319
|
estimated_cost += self._estimate_rds_cost(instance_class, region)
|
321
|
-
|
320
|
+
|
322
321
|
except Exception as e:
|
323
322
|
print_warning(f"RDS discovery failed in {region}: {str(e)}")
|
324
|
-
|
323
|
+
|
325
324
|
return {
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
325
|
+
"service": "RDS",
|
326
|
+
"resource_count": total_instances,
|
327
|
+
"estimated_cost": estimated_cost,
|
328
|
+
"optimization_opportunities": ["instance_rightsizing", "reserved_instances", "storage_optimization"],
|
330
329
|
}
|
331
|
-
|
330
|
+
|
332
331
|
async def _discover_vpc_resources(self, regions: List[str]) -> Dict[str, Any]:
|
333
332
|
"""Discover VPC resources (NAT Gateways, EIPs, etc.)."""
|
334
333
|
total_resources = 0
|
335
334
|
estimated_cost = 0.0
|
336
|
-
|
335
|
+
|
337
336
|
for region in regions:
|
338
337
|
try:
|
339
|
-
ec2 = self.session.client(
|
340
|
-
|
338
|
+
ec2 = self.session.client("ec2", region_name=region)
|
339
|
+
|
341
340
|
# NAT Gateways
|
342
341
|
nat_response = ec2.describe_nat_gateways()
|
343
|
-
nat_count = len(nat_response[
|
342
|
+
nat_count = len(nat_response["NatGateways"])
|
344
343
|
total_resources += nat_count
|
345
344
|
estimated_cost += nat_count * get_service_monthly_cost("nat_gateway", region)
|
346
|
-
|
345
|
+
|
347
346
|
# Elastic IPs
|
348
347
|
eip_response = ec2.describe_addresses()
|
349
|
-
eip_count = len(eip_response[
|
348
|
+
eip_count = len(eip_response["Addresses"])
|
350
349
|
total_resources += eip_count
|
351
350
|
estimated_cost += eip_count * get_service_monthly_cost("elastic_ip", region)
|
352
|
-
|
351
|
+
|
353
352
|
except Exception as e:
|
354
353
|
print_warning(f"VPC discovery failed in {region}: {str(e)}")
|
355
|
-
|
354
|
+
|
356
355
|
return {
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
356
|
+
"service": "VPC",
|
357
|
+
"resource_count": total_resources,
|
358
|
+
"estimated_cost": estimated_cost,
|
359
|
+
"optimization_opportunities": ["unused_nat_gateways", "unused_eips", "load_balancer_optimization"],
|
361
360
|
}
|
362
|
-
|
361
|
+
|
363
362
|
def _estimate_ec2_cost(self, instance_type: str, region: str = "us-east-1") -> float:
|
364
363
|
"""EC2 cost estimation using dynamic pricing with fallback."""
|
365
364
|
try:
|
366
365
|
# Map instance types to AWS pricing service keys
|
367
366
|
# For simplicity, using a base cost multiplier approach
|
368
367
|
base_cost = get_service_monthly_cost("ec2_instance", region)
|
369
|
-
|
368
|
+
|
370
369
|
# Instance type multipliers based on AWS pricing patterns
|
371
370
|
type_multipliers = {
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
371
|
+
"t3.nano": 0.1,
|
372
|
+
"t3.micro": 0.2,
|
373
|
+
"t3.small": 0.4,
|
374
|
+
"t3.medium": 0.8,
|
375
|
+
"t3.large": 1.6,
|
376
|
+
"t3.xlarge": 3.2,
|
377
|
+
"m5.large": 1.8,
|
378
|
+
"m5.xlarge": 3.6,
|
379
|
+
"m5.2xlarge": 7.2,
|
380
|
+
"c5.large": 1.6,
|
381
|
+
"c5.xlarge": 3.2,
|
382
|
+
"c5.2xlarge": 6.4,
|
376
383
|
}
|
377
|
-
|
384
|
+
|
378
385
|
multiplier = type_multipliers.get(instance_type, 1.0)
|
379
386
|
return base_cost * multiplier
|
380
|
-
|
387
|
+
|
381
388
|
except Exception:
|
382
389
|
# Fallback to regional cost calculation if service key not available
|
383
390
|
base_costs = {
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
391
|
+
"t3.nano": 3.8,
|
392
|
+
"t3.micro": 7.6,
|
393
|
+
"t3.small": 15.2,
|
394
|
+
"t3.medium": 30.4,
|
395
|
+
"t3.large": 60.8,
|
396
|
+
"t3.xlarge": 121.6,
|
397
|
+
"m5.large": 70.1,
|
398
|
+
"m5.xlarge": 140.2,
|
399
|
+
"m5.2xlarge": 280.3,
|
400
|
+
"c5.large": 62.1,
|
401
|
+
"c5.xlarge": 124.2,
|
402
|
+
"c5.2xlarge": 248.4,
|
388
403
|
}
|
389
404
|
base_cost = base_costs.get(instance_type, 50.0)
|
390
405
|
return calculate_regional_cost(base_cost, region)
|
391
|
-
|
406
|
+
|
392
407
|
def _estimate_ebs_cost(self, size_gb: int, volume_type: str, region: str = "us-east-1") -> float:
|
393
408
|
"""EBS cost estimation using dynamic pricing."""
|
394
409
|
try:
|
395
410
|
# Map volume types to service keys in our pricing engine
|
396
411
|
volume_service_map = {
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
412
|
+
"gp2": "ebs_gp2",
|
413
|
+
"gp3": "ebs_gp3",
|
414
|
+
"io1": "ebs_io1",
|
415
|
+
"io2": "ebs_io2",
|
416
|
+
"sc1": "ebs_sc1",
|
417
|
+
"st1": "ebs_st1",
|
403
418
|
}
|
404
|
-
|
405
|
-
service_key = volume_service_map.get(volume_type,
|
419
|
+
|
420
|
+
service_key = volume_service_map.get(volume_type, "ebs_gp2") # Default to gp2
|
406
421
|
cost_per_gb = get_service_monthly_cost(service_key, region)
|
407
422
|
return size_gb * cost_per_gb
|
408
|
-
|
423
|
+
|
409
424
|
except Exception:
|
410
425
|
# Fallback to regional cost calculation
|
411
|
-
cost_per_gb_base = {
|
412
|
-
'gp2': 0.10, 'gp3': 0.08, 'io1': 0.125, 'io2': 0.125, 'sc1': 0.025, 'st1': 0.045
|
413
|
-
}
|
426
|
+
cost_per_gb_base = {"gp2": 0.10, "gp3": 0.08, "io1": 0.125, "io2": 0.125, "sc1": 0.025, "st1": 0.045}
|
414
427
|
base_cost_per_gb = cost_per_gb_base.get(volume_type, 0.10)
|
415
428
|
regional_cost_per_gb = calculate_regional_cost(base_cost_per_gb, region)
|
416
429
|
return size_gb * regional_cost_per_gb
|
417
|
-
|
430
|
+
|
418
431
|
def _estimate_rds_cost(self, instance_class: str, region: str = "us-east-1") -> float:
|
419
432
|
"""RDS cost estimation using dynamic pricing with fallback."""
|
420
433
|
try:
|
421
434
|
# Use RDS snapshot pricing as a baseline, then apply instance multipliers
|
422
435
|
base_cost = get_service_monthly_cost("rds_snapshot", region)
|
423
|
-
|
436
|
+
|
424
437
|
# Instance class multipliers based on AWS RDS pricing patterns
|
425
438
|
class_multipliers = {
|
426
|
-
|
427
|
-
|
439
|
+
"db.t3.micro": 1.0,
|
440
|
+
"db.t3.small": 2.0,
|
441
|
+
"db.t3.medium": 4.0,
|
442
|
+
"db.m5.large": 9.6,
|
443
|
+
"db.m5.xlarge": 19.2,
|
444
|
+
"db.m5.2xlarge": 38.4,
|
428
445
|
}
|
429
|
-
|
446
|
+
|
430
447
|
multiplier = class_multipliers.get(instance_class, 6.8) # Reasonable default multiplier
|
431
448
|
return base_cost * multiplier
|
432
|
-
|
449
|
+
|
433
450
|
except Exception:
|
434
451
|
# Fallback to regional cost calculation
|
435
452
|
base_costs = {
|
436
|
-
|
437
|
-
|
453
|
+
"db.t3.micro": 14.6,
|
454
|
+
"db.t3.small": 29.2,
|
455
|
+
"db.t3.medium": 58.4,
|
456
|
+
"db.m5.large": 140.2,
|
457
|
+
"db.m5.xlarge": 280.3,
|
458
|
+
"db.m5.2xlarge": 560.6,
|
438
459
|
}
|
439
460
|
base_cost = base_costs.get(instance_class, 100.0)
|
440
461
|
return calculate_regional_cost(base_cost, region)
|
441
|
-
|
462
|
+
|
442
463
|
async def analyze_ec2_rightsizing(self) -> Dict[str, Any]:
|
443
464
|
"""Analyze EC2 instances for rightsizing opportunities."""
|
444
465
|
print_info("🔍 Analyzing EC2 rightsizing opportunities...")
|
445
|
-
|
466
|
+
|
446
467
|
# Real AWS integration for rightsizing analysis
|
447
468
|
from runbooks.common.aws_pricing import get_aws_pricing_engine, get_ec2_monthly_cost
|
448
469
|
|
@@ -450,18 +471,18 @@ class CostOptimizer(CloudOpsBase):
|
|
450
471
|
pricing_engine = get_aws_pricing_engine(profile=self.profile)
|
451
472
|
|
452
473
|
# Get actual EC2 instances from AWS API
|
453
|
-
ec2_client = self.session.client(
|
474
|
+
ec2_client = self.session.client("ec2")
|
454
475
|
response = ec2_client.describe_instances()
|
455
476
|
|
456
477
|
instances_analyzed = 0
|
457
478
|
oversized_instances = 0
|
458
479
|
potential_monthly_savings = 0.0
|
459
480
|
|
460
|
-
for reservation in response[
|
461
|
-
for instance in reservation[
|
462
|
-
if instance[
|
481
|
+
for reservation in response["Reservations"]:
|
482
|
+
for instance in reservation["Instances"]:
|
483
|
+
if instance["State"]["Name"] in ["running", "stopped"]:
|
463
484
|
instances_analyzed += 1
|
464
|
-
instance_type = instance[
|
485
|
+
instance_type = instance["InstanceType"]
|
465
486
|
|
466
487
|
# Calculate potential savings from rightsizing
|
467
488
|
current_cost = get_ec2_monthly_cost(instance_type, self.region, self.profile)
|
@@ -472,27 +493,27 @@ class CostOptimizer(CloudOpsBase):
|
|
472
493
|
smaller_cost = get_ec2_monthly_cost(smaller_instance, self.region, self.profile)
|
473
494
|
if smaller_cost < current_cost:
|
474
495
|
oversized_instances += 1
|
475
|
-
potential_monthly_savings +=
|
496
|
+
potential_monthly_savings += current_cost - smaller_cost
|
476
497
|
|
477
498
|
return {
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
499
|
+
"instances_analyzed": instances_analyzed,
|
500
|
+
"oversized_instances": oversized_instances,
|
501
|
+
"potential_savings": round(potential_monthly_savings, 2),
|
502
|
+
"resources_analyzed": instances_analyzed,
|
503
|
+
"resource_impacts": [],
|
483
504
|
}
|
484
505
|
|
485
506
|
except Exception as e:
|
486
507
|
print_warning(f"Could not get real EC2 data: {e}")
|
487
508
|
# Return minimal fallback
|
488
509
|
return {
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
510
|
+
"instances_analyzed": 0,
|
511
|
+
"oversized_instances": 0,
|
512
|
+
"potential_savings": 0.0,
|
513
|
+
"resources_analyzed": 0,
|
514
|
+
"resource_impacts": [],
|
494
515
|
}
|
495
|
-
|
516
|
+
|
496
517
|
async def analyze_ebs_optimization(self) -> Dict[str, Any]:
|
497
518
|
"""Analyze EBS volumes for optimization opportunities."""
|
498
519
|
print_info("🔍 Analyzing EBS optimization opportunities...")
|
@@ -502,47 +523,47 @@ class CostOptimizer(CloudOpsBase):
|
|
502
523
|
|
503
524
|
try:
|
504
525
|
# Get actual EBS volumes from AWS API
|
505
|
-
ec2_client = self.session.client(
|
526
|
+
ec2_client = self.session.client("ec2")
|
506
527
|
response = ec2_client.describe_volumes()
|
507
528
|
|
508
|
-
volumes_analyzed = len(response[
|
529
|
+
volumes_analyzed = len(response["Volumes"])
|
509
530
|
unattached_volumes = 0
|
510
531
|
oversized_volumes = 0
|
511
532
|
potential_monthly_savings = 0.0
|
512
533
|
|
513
|
-
for volume in response[
|
534
|
+
for volume in response["Volumes"]:
|
514
535
|
# Count unattached volumes
|
515
|
-
if volume[
|
536
|
+
if volume["State"] == "available":
|
516
537
|
unattached_volumes += 1
|
517
|
-
volume_size = volume[
|
518
|
-
volume_type = volume.get(
|
538
|
+
volume_size = volume["Size"]
|
539
|
+
volume_type = volume.get("VolumeType", "gp3")
|
519
540
|
cost_per_gb = get_ebs_gb_monthly_cost(volume_type, self.region, self.profile)
|
520
|
-
potential_monthly_savings +=
|
541
|
+
potential_monthly_savings += volume_size * cost_per_gb
|
521
542
|
|
522
543
|
# Identify potentially oversized volumes (basic heuristic)
|
523
|
-
elif volume[
|
544
|
+
elif volume["State"] == "in-use" and volume["Size"] > 100:
|
524
545
|
oversized_volumes += 1
|
525
546
|
|
526
547
|
return {
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
548
|
+
"volumes_analyzed": volumes_analyzed,
|
549
|
+
"unattached_volumes": unattached_volumes,
|
550
|
+
"oversized_volumes": oversized_volumes,
|
551
|
+
"potential_savings": round(potential_monthly_savings, 2),
|
552
|
+
"resources_analyzed": volumes_analyzed,
|
553
|
+
"resource_impacts": [],
|
533
554
|
}
|
534
555
|
|
535
556
|
except Exception as e:
|
536
557
|
print_warning(f"Could not get real EBS data: {e}")
|
537
558
|
return {
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
559
|
+
"volumes_analyzed": 0,
|
560
|
+
"unattached_volumes": 0,
|
561
|
+
"oversized_volumes": 0,
|
562
|
+
"potential_savings": 0.0,
|
563
|
+
"resources_analyzed": 0,
|
564
|
+
"resource_impacts": [],
|
544
565
|
}
|
545
|
-
|
566
|
+
|
546
567
|
async def analyze_unused_resources(self) -> Dict[str, Any]:
|
547
568
|
"""Analyze and identify unused AWS resources."""
|
548
569
|
print_info("🔍 Analyzing unused resources...")
|
@@ -551,55 +572,60 @@ class CostOptimizer(CloudOpsBase):
|
|
551
572
|
from runbooks.common.aws_pricing import get_eip_monthly_cost, get_ebs_gb_monthly_cost
|
552
573
|
|
553
574
|
try:
|
554
|
-
ec2_client = self.session.client(
|
575
|
+
ec2_client = self.session.client("ec2")
|
555
576
|
|
556
577
|
# Analyze unused Elastic IPs
|
557
578
|
eips_response = ec2_client.describe_addresses()
|
558
|
-
eip_unused = len([eip for eip in eips_response[
|
579
|
+
eip_unused = len([eip for eip in eips_response["Addresses"] if "AssociationId" not in eip])
|
559
580
|
|
560
581
|
# Analyze unattached volumes (already calculated in EBS optimization)
|
561
582
|
volumes_response = ec2_client.describe_volumes()
|
562
|
-
volumes_unattached = len([vol for vol in volumes_response[
|
583
|
+
volumes_unattached = len([vol for vol in volumes_response["Volumes"] if vol["State"] == "available"])
|
563
584
|
|
564
585
|
# Analyze old snapshots (older than 30 days)
|
565
586
|
from datetime import datetime, timedelta
|
587
|
+
|
566
588
|
cutoff_date = datetime.now() - timedelta(days=30)
|
567
|
-
snapshots_response = ec2_client.describe_snapshots(OwnerIds=[
|
568
|
-
snapshots_old = len(
|
569
|
-
|
570
|
-
|
571
|
-
|
589
|
+
snapshots_response = ec2_client.describe_snapshots(OwnerIds=["self"])
|
590
|
+
snapshots_old = len(
|
591
|
+
[
|
592
|
+
snap
|
593
|
+
for snap in snapshots_response["Snapshots"]
|
594
|
+
if datetime.fromisoformat(snap["StartTime"].replace("Z", "+00:00")).replace(tzinfo=None)
|
595
|
+
< cutoff_date
|
596
|
+
]
|
597
|
+
)
|
572
598
|
|
573
599
|
# Calculate potential savings
|
574
600
|
eip_monthly_cost = get_eip_monthly_cost(self.region, self.profile)
|
575
601
|
potential_eip_savings = eip_unused * eip_monthly_cost
|
576
602
|
|
577
603
|
# Estimate EBS snapshot costs (minimal but accumulated)
|
578
|
-
ebs_cost_per_gb = get_ebs_gb_monthly_cost(
|
604
|
+
ebs_cost_per_gb = get_ebs_gb_monthly_cost("gp3", self.region, self.profile)
|
579
605
|
estimated_snapshot_savings = snapshots_old * 5.0 * ebs_cost_per_gb # Assume 5GB average per snapshot
|
580
606
|
|
581
607
|
total_potential_savings = potential_eip_savings + estimated_snapshot_savings
|
582
608
|
|
583
609
|
return {
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
610
|
+
"eip_unused": eip_unused,
|
611
|
+
"volumes_unattached": volumes_unattached,
|
612
|
+
"snapshots_old": snapshots_old,
|
613
|
+
"potential_savings": round(total_potential_savings, 2),
|
614
|
+
"resources_analyzed": eip_unused + volumes_unattached + snapshots_old,
|
615
|
+
"resource_impacts": [],
|
590
616
|
}
|
591
617
|
|
592
618
|
except Exception as e:
|
593
619
|
print_warning(f"Could not get real unused resources data: {e}")
|
594
620
|
return {
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
621
|
+
"eip_unused": 0,
|
622
|
+
"volumes_unattached": 0,
|
623
|
+
"snapshots_old": 0,
|
624
|
+
"potential_savings": 0.0,
|
625
|
+
"resources_analyzed": 0,
|
626
|
+
"resource_impacts": [],
|
601
627
|
}
|
602
|
-
|
628
|
+
|
603
629
|
async def analyze_s3_optimization(self) -> Dict[str, Any]:
|
604
630
|
"""Analyze S3 buckets for storage class optimization using real AWS data."""
|
605
631
|
print_info("🔍 Analyzing S3 optimization opportunities...")
|
@@ -611,11 +637,11 @@ class CostOptimizer(CloudOpsBase):
|
|
611
637
|
resource_impacts = []
|
612
638
|
|
613
639
|
try:
|
614
|
-
s3_client = self.session.client(
|
640
|
+
s3_client = self.session.client("s3")
|
615
641
|
|
616
642
|
# Get all S3 buckets
|
617
643
|
response = s3_client.list_buckets()
|
618
|
-
all_buckets = response.get(
|
644
|
+
all_buckets = response.get("Buckets", [])
|
619
645
|
buckets_analyzed = len(all_buckets)
|
620
646
|
|
621
647
|
print_info(f"Found {buckets_analyzed} S3 buckets for analysis")
|
@@ -625,12 +651,12 @@ class CostOptimizer(CloudOpsBase):
|
|
625
651
|
task = progress.add_task("[cyan]Analyzing S3 buckets...", total=len(all_buckets))
|
626
652
|
|
627
653
|
for bucket in all_buckets:
|
628
|
-
bucket_name = bucket[
|
654
|
+
bucket_name = bucket["Name"]
|
629
655
|
|
630
656
|
try:
|
631
657
|
# Check bucket region to create regional client
|
632
658
|
bucket_region = await self._get_bucket_region(s3_client, bucket_name)
|
633
|
-
regional_s3 = self.session.client(
|
659
|
+
regional_s3 = self.session.client("s3", region_name=bucket_region)
|
634
660
|
|
635
661
|
# Analyze lifecycle configuration
|
636
662
|
lifecycle_needed = await self._analyze_bucket_lifecycle(regional_s3, bucket_name)
|
@@ -639,9 +665,9 @@ class CostOptimizer(CloudOpsBase):
|
|
639
665
|
|
640
666
|
# Analyze storage class optimization
|
641
667
|
storage_optimization = await self._analyze_bucket_storage_classes(regional_s3, bucket_name)
|
642
|
-
if storage_optimization[
|
668
|
+
if storage_optimization["has_optimization_opportunity"]:
|
643
669
|
storage_class_optimization += 1
|
644
|
-
potential_savings += storage_optimization[
|
670
|
+
potential_savings += storage_optimization["estimated_monthly_savings"]
|
645
671
|
|
646
672
|
# Create resource impact for this bucket
|
647
673
|
resource_impacts.append(
|
@@ -650,12 +676,12 @@ class CostOptimizer(CloudOpsBase):
|
|
650
676
|
resource_id=bucket_name,
|
651
677
|
region=bucket_region,
|
652
678
|
account_id=self.account_id,
|
653
|
-
estimated_monthly_cost=storage_optimization[
|
654
|
-
projected_savings=storage_optimization[
|
679
|
+
estimated_monthly_cost=storage_optimization["current_cost"],
|
680
|
+
projected_savings=storage_optimization["estimated_monthly_savings"],
|
655
681
|
risk_level=RiskLevel.LOW,
|
656
682
|
modification_required=True,
|
657
683
|
resource_name=f"S3 Bucket {bucket_name}",
|
658
|
-
estimated_downtime=0.0
|
684
|
+
estimated_downtime=0.0,
|
659
685
|
)
|
660
686
|
)
|
661
687
|
|
@@ -681,29 +707,29 @@ class CostOptimizer(CloudOpsBase):
|
|
681
707
|
potential_savings = 0.0
|
682
708
|
|
683
709
|
return {
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
710
|
+
"buckets_analyzed": buckets_analyzed,
|
711
|
+
"lifecycle_opportunities": lifecycle_opportunities,
|
712
|
+
"storage_class_optimization": storage_class_optimization,
|
713
|
+
"potential_savings": potential_savings,
|
714
|
+
"resources_analyzed": buckets_analyzed,
|
715
|
+
"resource_impacts": resource_impacts,
|
690
716
|
}
|
691
717
|
|
692
718
|
async def _get_bucket_region(self, s3_client, bucket_name: str) -> str:
|
693
719
|
"""Get the region for a specific S3 bucket."""
|
694
720
|
try:
|
695
721
|
response = s3_client.get_bucket_location(Bucket=bucket_name)
|
696
|
-
region = response.get(
|
722
|
+
region = response.get("LocationConstraint")
|
697
723
|
|
698
724
|
# Handle special case for US East 1
|
699
725
|
if region is None:
|
700
|
-
return
|
726
|
+
return "us-east-1"
|
701
727
|
|
702
728
|
return region
|
703
729
|
|
704
730
|
except Exception as e:
|
705
731
|
print_warning(f"Could not determine region for bucket {bucket_name}: {str(e)}")
|
706
|
-
return
|
732
|
+
return "us-east-1" # Default fallback
|
707
733
|
|
708
734
|
async def _analyze_bucket_lifecycle(self, s3_client, bucket_name: str) -> bool:
|
709
735
|
"""
|
@@ -718,7 +744,7 @@ class CostOptimizer(CloudOpsBase):
|
|
718
744
|
# If lifecycle exists, assume it's already optimized
|
719
745
|
return False
|
720
746
|
except ClientError as e:
|
721
|
-
if e.response[
|
747
|
+
if e.response["Error"]["Code"] == "NoSuchLifecycleConfiguration":
|
722
748
|
# No lifecycle policy exists - could benefit from one
|
723
749
|
pass
|
724
750
|
else:
|
@@ -727,16 +753,16 @@ class CostOptimizer(CloudOpsBase):
|
|
727
753
|
|
728
754
|
# Check bucket size and object count to determine if lifecycle is beneficial
|
729
755
|
try:
|
730
|
-
paginator = s3_client.get_paginator(
|
731
|
-
page_iterator = paginator.paginate(Bucket=bucket_name, PaginationConfig={
|
756
|
+
paginator = s3_client.get_paginator("list_objects_v2")
|
757
|
+
page_iterator = paginator.paginate(Bucket=bucket_name, PaginationConfig={"MaxItems": 100})
|
732
758
|
|
733
759
|
object_count = 0
|
734
760
|
total_size = 0
|
735
761
|
|
736
762
|
for page in page_iterator:
|
737
|
-
if
|
738
|
-
object_count += len(page[
|
739
|
-
total_size += sum(obj.get(
|
763
|
+
if "Contents" in page:
|
764
|
+
object_count += len(page["Contents"])
|
765
|
+
total_size += sum(obj.get("Size", 0) for obj in page["Contents"])
|
740
766
|
|
741
767
|
# Recommend lifecycle if bucket has significant content
|
742
768
|
# and could benefit from automatic transitions
|
@@ -760,138 +786,131 @@ class CostOptimizer(CloudOpsBase):
|
|
760
786
|
"""
|
761
787
|
try:
|
762
788
|
# Get storage class analytics if available
|
763
|
-
paginator = s3_client.get_paginator(
|
764
|
-
page_iterator = paginator.paginate(Bucket=bucket_name, PaginationConfig={
|
789
|
+
paginator = s3_client.get_paginator("list_objects_v2")
|
790
|
+
page_iterator = paginator.paginate(Bucket=bucket_name, PaginationConfig={"MaxItems": 1000})
|
765
791
|
|
766
792
|
storage_analysis = {
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
|
771
|
-
|
772
|
-
|
773
|
-
|
774
|
-
|
793
|
+
"standard_objects": 0,
|
794
|
+
"standard_size": 0,
|
795
|
+
"infrequent_access_candidates": 0,
|
796
|
+
"archive_candidates": 0,
|
797
|
+
"current_cost": 0.0,
|
798
|
+
"optimized_cost": 0.0,
|
799
|
+
"has_optimization_opportunity": False,
|
800
|
+
"estimated_monthly_savings": 0.0,
|
775
801
|
}
|
776
802
|
|
777
803
|
current_time = datetime.now()
|
778
804
|
|
779
805
|
for page in page_iterator:
|
780
|
-
if
|
806
|
+
if "Contents" not in page:
|
781
807
|
continue
|
782
808
|
|
783
|
-
for obj in page[
|
784
|
-
size_gb = obj.get(
|
785
|
-
last_modified = obj.get(
|
809
|
+
for obj in page["Contents"]:
|
810
|
+
size_gb = obj.get("Size", 0) / (1024 * 1024 * 1024) # Convert to GB
|
811
|
+
last_modified = obj.get("LastModified", current_time)
|
786
812
|
|
787
813
|
# Calculate age of object
|
788
|
-
if hasattr(last_modified,
|
814
|
+
if hasattr(last_modified, "replace"):
|
789
815
|
age_days = (current_time - last_modified.replace(tzinfo=None)).days
|
790
816
|
else:
|
791
817
|
age_days = 0
|
792
818
|
|
793
|
-
storage_class = obj.get(
|
819
|
+
storage_class = obj.get("StorageClass", "STANDARD")
|
794
820
|
|
795
821
|
# Analyze optimization opportunities
|
796
|
-
if storage_class ==
|
797
|
-
storage_analysis[
|
798
|
-
storage_analysis[
|
822
|
+
if storage_class == "STANDARD":
|
823
|
+
storage_analysis["standard_objects"] += 1
|
824
|
+
storage_analysis["standard_size"] += size_gb
|
799
825
|
|
800
826
|
# Current cost (Standard storage ~$0.023/GB/month)
|
801
827
|
standard_cost = size_gb * 0.023
|
802
|
-
storage_analysis[
|
828
|
+
storage_analysis["current_cost"] += standard_cost
|
803
829
|
|
804
830
|
# Check if object could be moved to cheaper storage class
|
805
831
|
if age_days > 30 and size_gb > 0.1: # Objects older than 30 days and >100MB
|
806
|
-
storage_analysis[
|
832
|
+
storage_analysis["infrequent_access_candidates"] += 1
|
807
833
|
# IA storage ~$0.0125/GB/month
|
808
834
|
ia_cost = size_gb * 0.0125
|
809
|
-
storage_analysis[
|
835
|
+
storage_analysis["optimized_cost"] += ia_cost
|
810
836
|
elif age_days > 90 and size_gb > 0.05: # Objects older than 90 days
|
811
|
-
storage_analysis[
|
837
|
+
storage_analysis["archive_candidates"] += 1
|
812
838
|
# Glacier ~$0.004/GB/month
|
813
839
|
glacier_cost = size_gb * 0.004
|
814
|
-
storage_analysis[
|
840
|
+
storage_analysis["optimized_cost"] += glacier_cost
|
815
841
|
else:
|
816
842
|
# No optimization for this object
|
817
|
-
storage_analysis[
|
843
|
+
storage_analysis["optimized_cost"] += standard_cost
|
818
844
|
|
819
845
|
# Calculate potential savings
|
820
|
-
potential_savings = storage_analysis[
|
846
|
+
potential_savings = storage_analysis["current_cost"] - storage_analysis["optimized_cost"]
|
821
847
|
|
822
848
|
if potential_savings > 1.0: # Minimum $1/month savings to be worth it
|
823
|
-
storage_analysis[
|
824
|
-
storage_analysis[
|
849
|
+
storage_analysis["has_optimization_opportunity"] = True
|
850
|
+
storage_analysis["estimated_monthly_savings"] = potential_savings
|
825
851
|
|
826
852
|
return storage_analysis
|
827
853
|
|
828
854
|
except Exception as e:
|
829
855
|
print_warning(f"Could not analyze storage classes for {bucket_name}: {str(e)}")
|
830
|
-
return {
|
831
|
-
'has_optimization_opportunity': False,
|
832
|
-
'estimated_monthly_savings': 0.0,
|
833
|
-
'current_cost': 0.0
|
834
|
-
}
|
856
|
+
return {"has_optimization_opportunity": False, "estimated_monthly_savings": 0.0, "current_cost": 0.0}
|
835
857
|
|
836
858
|
async def optimize_nat_gateways(
|
837
|
-
self,
|
838
|
-
regions: Optional[List[str]] = None,
|
839
|
-
idle_threshold_days: int = 7,
|
840
|
-
cost_threshold: float = 0.0
|
859
|
+
self, regions: Optional[List[str]] = None, idle_threshold_days: int = 7, cost_threshold: float = 0.0
|
841
860
|
) -> CostOptimizationResult:
|
842
861
|
"""
|
843
862
|
Business Scenario: Delete unused NAT Gateways
|
844
863
|
Source: AWS_Delete_Unused_NAT_Gateways.ipynb
|
845
|
-
|
864
|
+
|
846
865
|
Typical Business Impact:
|
847
866
|
- Cost savings: significant value range/month per unused NAT Gateway
|
848
867
|
- Risk level: Low (network connectivity analysis performed)
|
849
868
|
- Implementation time: 15-30 minutes
|
850
|
-
|
869
|
+
|
851
870
|
Args:
|
852
871
|
regions: Target regions (default: all available)
|
853
872
|
idle_threshold_days: Days to consider NAT Gateway idle
|
854
873
|
cost_threshold: Minimum monthly cost to consider for optimization
|
855
|
-
|
874
|
+
|
856
875
|
Returns:
|
857
876
|
CostOptimizationResult with detailed savings and impact analysis
|
858
877
|
"""
|
859
878
|
operation_name = "NAT Gateway Cost Optimization"
|
860
879
|
print_header(f"🔍 {operation_name}")
|
861
|
-
|
880
|
+
|
862
881
|
# Initialize result tracking
|
863
882
|
unused_gateways = []
|
864
883
|
total_current_cost = 0.0
|
865
884
|
total_projected_savings = 0.0
|
866
|
-
|
885
|
+
|
867
886
|
# Get target regions
|
868
|
-
target_regions = regions or self._get_available_regions(
|
869
|
-
|
887
|
+
target_regions = regions or self._get_available_regions("ec2")[:5] # Limit for performance
|
888
|
+
|
870
889
|
print_info(f"Analyzing NAT Gateways across {len(target_regions)} regions")
|
871
890
|
print_info(f"Idle threshold: {idle_threshold_days} days")
|
872
|
-
|
891
|
+
|
873
892
|
# Progress tracking
|
874
893
|
with create_progress_bar() as progress:
|
875
894
|
task = progress.add_task("[cyan]Scanning NAT Gateways...", total=len(target_regions))
|
876
|
-
|
895
|
+
|
877
896
|
for region in target_regions:
|
878
897
|
try:
|
879
898
|
region_gateways = await self._analyze_nat_gateways_in_region(
|
880
899
|
region, idle_threshold_days, cost_threshold
|
881
900
|
)
|
882
901
|
unused_gateways.extend(region_gateways)
|
883
|
-
|
902
|
+
|
884
903
|
progress.update(task, advance=1)
|
885
|
-
|
904
|
+
|
886
905
|
except Exception as e:
|
887
906
|
print_warning(f"Could not analyze region {region}: {str(e)}")
|
888
907
|
continue
|
889
|
-
|
908
|
+
|
890
909
|
# Calculate total impact
|
891
910
|
for gateway in unused_gateways:
|
892
911
|
total_current_cost += gateway.estimated_monthly_cost or 0
|
893
912
|
total_projected_savings += gateway.projected_savings or 0
|
894
|
-
|
913
|
+
|
895
914
|
# Create resource impacts
|
896
915
|
resource_impacts = [
|
897
916
|
self.create_resource_impact(
|
@@ -903,23 +922,23 @@ class CostOptimizer(CloudOpsBase):
|
|
903
922
|
risk_level=RiskLevel.LOW, # NAT Gateway deletion is typically low risk
|
904
923
|
modification_required=True,
|
905
924
|
resource_name=f"NAT Gateway {gateway.resource_id}",
|
906
|
-
estimated_downtime=0.0 # NAT Gateway deletion has no downtime impact
|
925
|
+
estimated_downtime=0.0, # NAT Gateway deletion has no downtime impact
|
907
926
|
)
|
908
927
|
for gateway in unused_gateways
|
909
928
|
]
|
910
|
-
|
929
|
+
|
911
930
|
# Business impact analysis
|
912
931
|
business_metrics = self.create_business_metrics(
|
913
932
|
total_savings=total_projected_savings,
|
914
933
|
implementation_cost=0.0, # No implementation cost for deletion
|
915
|
-
overall_risk=RiskLevel.LOW
|
934
|
+
overall_risk=RiskLevel.LOW,
|
916
935
|
)
|
917
|
-
|
936
|
+
|
918
937
|
# Executive summary display
|
919
938
|
if unused_gateways:
|
920
939
|
print_success(f"💰 Found {len(unused_gateways)} unused NAT Gateways")
|
921
940
|
print_success(f"💵 Potential monthly savings: {format_cost(total_projected_savings)}")
|
922
|
-
|
941
|
+
|
923
942
|
# Detailed table
|
924
943
|
nat_table = create_table(
|
925
944
|
title="Unused NAT Gateway Analysis",
|
@@ -928,27 +947,27 @@ class CostOptimizer(CloudOpsBase):
|
|
928
947
|
{"name": "Region", "style": "green"},
|
929
948
|
{"name": "Monthly Cost", "style": "cost"},
|
930
949
|
{"name": "Last Activity", "style": "yellow"},
|
931
|
-
{"name": "Risk Level", "style": "blue"}
|
932
|
-
]
|
950
|
+
{"name": "Risk Level", "style": "blue"},
|
951
|
+
],
|
933
952
|
)
|
934
|
-
|
953
|
+
|
935
954
|
for gateway in unused_gateways[:10]: # Show top 10 for readability
|
936
955
|
nat_table.add_row(
|
937
956
|
gateway.resource_id,
|
938
957
|
gateway.region,
|
939
958
|
format_cost(gateway.estimated_monthly_cost or 0),
|
940
959
|
f"{idle_threshold_days}+ days ago",
|
941
|
-
gateway.risk_level.value.title()
|
960
|
+
gateway.risk_level.value.title(),
|
942
961
|
)
|
943
|
-
|
962
|
+
|
944
963
|
console.print(nat_table)
|
945
|
-
|
964
|
+
|
946
965
|
if not self.dry_run and self.execution_mode == ExecutionMode.EXECUTE:
|
947
966
|
print_warning("⚡ Executing NAT Gateway deletion...")
|
948
967
|
await self._execute_nat_gateway_deletion(unused_gateways)
|
949
968
|
else:
|
950
969
|
print_info("✅ No unused NAT Gateways found - infrastructure is optimized")
|
951
|
-
|
970
|
+
|
952
971
|
# Create comprehensive result
|
953
972
|
result = CostOptimizationResult(
|
954
973
|
scenario=BusinessScenario.COST_OPTIMIZATION,
|
@@ -964,70 +983,64 @@ class CostOptimizer(CloudOpsBase):
|
|
964
983
|
recommendations=[
|
965
984
|
"Set up CloudWatch alarms for NAT Gateway utilization monitoring",
|
966
985
|
"Consider VPC Endpoints to reduce NAT Gateway dependencies",
|
967
|
-
"Review network architecture for optimization opportunities"
|
986
|
+
"Review network architecture for optimization opportunities",
|
968
987
|
],
|
969
988
|
aws_profile_used=self.profile,
|
970
989
|
regions_analyzed=target_regions,
|
971
990
|
services_analyzed=["ec2", "cloudwatch"],
|
972
|
-
|
973
991
|
# Cost-specific metrics
|
974
992
|
current_monthly_spend=total_current_cost,
|
975
993
|
optimized_monthly_spend=total_current_cost - total_projected_savings,
|
976
994
|
savings_percentage=(total_projected_savings / total_current_cost * 100) if total_current_cost > 0 else 0,
|
977
995
|
idle_resources=resource_impacts,
|
978
996
|
oversized_resources=[],
|
979
|
-
unattached_resources=[]
|
997
|
+
unattached_resources=[],
|
980
998
|
)
|
981
|
-
|
999
|
+
|
982
1000
|
self.display_execution_summary(result)
|
983
1001
|
return result
|
984
|
-
|
1002
|
+
|
985
1003
|
async def _analyze_nat_gateways_in_region(
|
986
|
-
self,
|
987
|
-
region: str,
|
988
|
-
idle_threshold_days: int,
|
989
|
-
cost_threshold: float
|
1004
|
+
self, region: str, idle_threshold_days: int, cost_threshold: float
|
990
1005
|
) -> List[ResourceImpact]:
|
991
1006
|
"""
|
992
1007
|
Analyze NAT Gateways in a specific region for optimization opportunities.
|
993
|
-
|
1008
|
+
|
994
1009
|
Args:
|
995
1010
|
region: AWS region to analyze
|
996
1011
|
idle_threshold_days: Days to consider idle
|
997
1012
|
cost_threshold: Minimum cost threshold
|
998
|
-
|
1013
|
+
|
999
1014
|
Returns:
|
1000
1015
|
List of unused NAT Gateway ResourceImpacts
|
1001
1016
|
"""
|
1002
1017
|
unused_gateways = []
|
1003
|
-
|
1018
|
+
|
1004
1019
|
try:
|
1005
|
-
ec2 = self.session.client(
|
1006
|
-
cloudwatch = self.session.client(
|
1007
|
-
|
1020
|
+
ec2 = self.session.client("ec2", region_name=region)
|
1021
|
+
cloudwatch = self.session.client("cloudwatch", region_name=region)
|
1022
|
+
|
1008
1023
|
# Get all NAT Gateways in region
|
1009
1024
|
response = ec2.describe_nat_gateways()
|
1010
|
-
|
1011
|
-
for nat_gateway in response.get(
|
1012
|
-
gateway_id = nat_gateway[
|
1013
|
-
state = nat_gateway[
|
1014
|
-
|
1025
|
+
|
1026
|
+
for nat_gateway in response.get("NatGateways", []):
|
1027
|
+
gateway_id = nat_gateway["NatGatewayId"]
|
1028
|
+
state = nat_gateway["State"]
|
1029
|
+
|
1015
1030
|
# Only analyze available gateways
|
1016
|
-
if state !=
|
1031
|
+
if state != "available":
|
1017
1032
|
continue
|
1018
|
-
|
1033
|
+
|
1019
1034
|
# Check utilization over the threshold period
|
1020
|
-
is_unused = await self._check_nat_gateway_utilization(
|
1021
|
-
|
1022
|
-
)
|
1023
|
-
|
1035
|
+
is_unused = await self._check_nat_gateway_utilization(cloudwatch, gateway_id, idle_threshold_days)
|
1036
|
+
|
1024
1037
|
if is_unused:
|
1025
1038
|
# Estimate cost using dynamic pricing
|
1026
1039
|
estimated_cost = get_service_monthly_cost("nat_gateway", region)
|
1027
|
-
|
1040
|
+
|
1028
1041
|
# Add data processing costs if available
|
1029
1042
|
# (This would require more detailed Cost Explorer integration)
|
1030
|
-
|
1043
|
+
|
1031
1044
|
if estimated_cost >= cost_threshold:
|
1032
1045
|
unused_gateway = ResourceImpact(
|
1033
1046
|
resource_type="nat-gateway",
|
@@ -1039,78 +1052,71 @@ class CostOptimizer(CloudOpsBase):
|
|
1039
1052
|
risk_level=RiskLevel.LOW,
|
1040
1053
|
modification_required=True,
|
1041
1054
|
resource_name=f"NAT Gateway {gateway_id}",
|
1042
|
-
estimated_downtime=0.0
|
1055
|
+
estimated_downtime=0.0,
|
1043
1056
|
)
|
1044
1057
|
unused_gateways.append(unused_gateway)
|
1045
|
-
|
1058
|
+
|
1046
1059
|
except ClientError as e:
|
1047
1060
|
print_warning(f"Could not analyze NAT Gateways in {region}: {str(e)}")
|
1048
|
-
|
1061
|
+
|
1049
1062
|
return unused_gateways
|
1050
|
-
|
1051
|
-
async def _check_nat_gateway_utilization(
|
1052
|
-
self,
|
1053
|
-
cloudwatch_client,
|
1054
|
-
gateway_id: str,
|
1055
|
-
days: int
|
1056
|
-
) -> bool:
|
1063
|
+
|
1064
|
+
async def _check_nat_gateway_utilization(self, cloudwatch_client, gateway_id: str, days: int) -> bool:
|
1057
1065
|
"""
|
1058
1066
|
Check if NAT Gateway has been idle based on CloudWatch metrics.
|
1059
|
-
|
1067
|
+
|
1060
1068
|
Args:
|
1061
1069
|
cloudwatch_client: CloudWatch client for the region
|
1062
1070
|
gateway_id: NAT Gateway ID
|
1063
1071
|
days: Number of days to check
|
1064
|
-
|
1072
|
+
|
1065
1073
|
Returns:
|
1066
1074
|
True if NAT Gateway appears unused, False otherwise
|
1067
1075
|
"""
|
1068
1076
|
try:
|
1069
1077
|
end_time = datetime.utcnow()
|
1070
1078
|
start_time = end_time - timedelta(days=days)
|
1071
|
-
|
1079
|
+
|
1072
1080
|
# Check bytes transferred metric
|
1073
1081
|
response = cloudwatch_client.get_metric_statistics(
|
1074
|
-
Namespace=
|
1075
|
-
MetricName=
|
1076
|
-
Dimensions=[
|
1077
|
-
{'Name': 'NatGatewayId', 'Value': gateway_id}
|
1078
|
-
],
|
1082
|
+
Namespace="AWS/NatGateway",
|
1083
|
+
MetricName="BytesInFromDestination",
|
1084
|
+
Dimensions=[{"Name": "NatGatewayId", "Value": gateway_id}],
|
1079
1085
|
StartTime=start_time,
|
1080
1086
|
EndTime=end_time,
|
1081
1087
|
Period=86400, # Daily
|
1082
|
-
Statistics=[
|
1088
|
+
Statistics=["Sum"],
|
1083
1089
|
)
|
1084
|
-
|
1090
|
+
|
1085
1091
|
# If no metrics or very low usage, consider unused
|
1086
|
-
datapoints = response.get(
|
1092
|
+
datapoints = response.get("Datapoints", [])
|
1087
1093
|
if not datapoints:
|
1088
1094
|
return True
|
1089
|
-
|
1095
|
+
|
1090
1096
|
# Calculate total bytes over period
|
1091
|
-
total_bytes = sum(dp[
|
1092
|
-
|
1097
|
+
total_bytes = sum(dp["Sum"] for dp in datapoints)
|
1098
|
+
|
1093
1099
|
# Consider unused if less than 100MB over the entire period
|
1094
1100
|
usage_threshold = 100 * 1024 * 1024 # 100MB
|
1095
1101
|
return total_bytes < usage_threshold
|
1096
|
-
|
1102
|
+
|
1097
1103
|
except Exception:
|
1098
1104
|
# If we can't get metrics, assume it's in use (safe approach)
|
1099
1105
|
return False
|
1100
|
-
|
1106
|
+
|
1101
1107
|
async def _execute_nat_gateway_deletion(self, unused_gateways: List[ResourceImpact]) -> None:
|
1102
1108
|
"""
|
1103
1109
|
Execute NAT Gateway deletion for confirmed unused gateways.
|
1104
|
-
|
1110
|
+
|
1105
1111
|
Args:
|
1106
1112
|
unused_gateways: List of confirmed unused NAT Gateways
|
1107
1113
|
"""
|
1108
1114
|
if self.dry_run:
|
1109
1115
|
print_info("DRY RUN: Would delete NAT Gateways")
|
1110
1116
|
return
|
1111
|
-
|
1117
|
+
|
1112
1118
|
print_warning("🚨 EXECUTING NAT Gateway deletions - this action cannot be undone!")
|
1113
|
-
|
1119
|
+
|
1114
1120
|
# Group by region for efficient processing
|
1115
1121
|
gateways_by_region = {}
|
1116
1122
|
for gateway in unused_gateways:
|
@@ -1118,68 +1124,68 @@ class CostOptimizer(CloudOpsBase):
|
|
1118
1124
|
if region not in gateways_by_region:
|
1119
1125
|
gateways_by_region[region] = []
|
1120
1126
|
gateways_by_region[region].append(gateway)
|
1121
|
-
|
1127
|
+
|
1122
1128
|
for region, gateways in gateways_by_region.items():
|
1123
1129
|
try:
|
1124
|
-
ec2 = self.session.client(
|
1125
|
-
|
1130
|
+
ec2 = self.session.client("ec2", region_name=region)
|
1131
|
+
|
1126
1132
|
for gateway in gateways:
|
1127
1133
|
try:
|
1128
1134
|
ec2.delete_nat_gateway(NatGatewayId=gateway.resource_id)
|
1129
1135
|
print_success(f"✅ Deleted NAT Gateway {gateway.resource_id} in {region}")
|
1130
|
-
|
1136
|
+
|
1131
1137
|
except ClientError as e:
|
1132
1138
|
print_error(f"❌ Failed to delete {gateway.resource_id}: {str(e)}")
|
1133
|
-
|
1139
|
+
|
1134
1140
|
except Exception as e:
|
1135
1141
|
print_error(f"❌ Failed to process region {region}: {str(e)}")
|
1136
|
-
|
1142
|
+
|
1137
1143
|
async def optimize_idle_ec2_instances(
|
1138
1144
|
self,
|
1139
1145
|
regions: Optional[List[str]] = None,
|
1140
1146
|
cpu_threshold: float = 5.0,
|
1141
1147
|
duration_hours: int = 168, # 7 days
|
1142
|
-
cost_threshold: float = None
|
1148
|
+
cost_threshold: float = None,
|
1143
1149
|
) -> CostOptimizationResult:
|
1144
1150
|
"""
|
1145
1151
|
Business Scenario: Stop idle EC2 instances
|
1146
1152
|
Source: AWS_Stop_Idle_EC2_Instances.ipynb
|
1147
|
-
|
1153
|
+
|
1148
1154
|
Typical Business Impact:
|
1149
1155
|
- Cost savings: 20-60% on compute costs
|
1150
1156
|
- Risk level: Medium (requires application impact analysis)
|
1151
1157
|
- Implementation time: 30-60 minutes
|
1152
|
-
|
1158
|
+
|
1153
1159
|
Args:
|
1154
1160
|
regions: Target regions for analysis
|
1155
1161
|
cpu_threshold: CPU utilization threshold (%)
|
1156
1162
|
duration_hours: Analysis period in hours
|
1157
1163
|
cost_threshold: Minimum monthly cost to consider
|
1158
|
-
|
1164
|
+
|
1159
1165
|
Returns:
|
1160
1166
|
CostOptimizationResult with idle instance analysis
|
1161
1167
|
"""
|
1162
1168
|
operation_name = "Idle EC2 Instance Optimization"
|
1163
1169
|
print_header(f"📊 {operation_name}")
|
1164
|
-
|
1170
|
+
|
1165
1171
|
# Implementation follows similar pattern to NAT Gateway optimization
|
1166
1172
|
# This would integrate the logic from AWS_Stop_Idle_EC2_Instances.ipynb
|
1167
|
-
|
1173
|
+
|
1168
1174
|
# Set dynamic cost threshold if not provided - NO hardcoded defaults
|
1169
1175
|
if cost_threshold is None:
|
1170
|
-
cost_threshold = get_required_env_float(
|
1171
|
-
|
1176
|
+
cost_threshold = get_required_env_float("EC2_COST_THRESHOLD")
|
1177
|
+
|
1172
1178
|
print_info(f"Analyzing EC2 instances with <{cpu_threshold}% CPU utilization")
|
1173
1179
|
print_info(f"Analysis period: {duration_hours} hours")
|
1174
1180
|
print_info(f"Minimum cost threshold: ${cost_threshold}/month")
|
1175
|
-
|
1181
|
+
|
1176
1182
|
# Placeholder for detailed implementation
|
1177
1183
|
# In production, this would:
|
1178
1184
|
# 1. Query CloudWatch for EC2 CPU metrics
|
1179
1185
|
# 2. Identify instances below threshold
|
1180
1186
|
# 3. Calculate cost impact
|
1181
1187
|
# 4. Generate business recommendations
|
1182
|
-
|
1188
|
+
|
1183
1189
|
return CostOptimizationResult(
|
1184
1190
|
scenario=BusinessScenario.COST_OPTIMIZATION,
|
1185
1191
|
scenario_name="Idle EC2 Instance Optimization",
|
@@ -1193,8 +1199,8 @@ class CostOptimizer(CloudOpsBase):
|
|
1193
1199
|
business_metrics=self.create_business_metrics(),
|
1194
1200
|
recommendations=[
|
1195
1201
|
"Implement auto-scaling policies for variable workloads",
|
1196
|
-
"Consider spot instances for fault-tolerant workloads",
|
1197
|
-
"Review instance sizing for optimization opportunities"
|
1202
|
+
"Consider spot instances for fault-tolerant workloads",
|
1203
|
+
"Review instance sizing for optimization opportunities",
|
1198
1204
|
],
|
1199
1205
|
aws_profile_used=self.profile,
|
1200
1206
|
regions_analyzed=regions or [],
|
@@ -1204,14 +1210,11 @@ class CostOptimizer(CloudOpsBase):
|
|
1204
1210
|
savings_percentage=0.0,
|
1205
1211
|
idle_resources=[],
|
1206
1212
|
oversized_resources=[],
|
1207
|
-
unattached_resources=[]
|
1213
|
+
unattached_resources=[],
|
1208
1214
|
)
|
1209
|
-
|
1215
|
+
|
1210
1216
|
async def optimize_workspaces(
|
1211
|
-
self,
|
1212
|
-
usage_threshold_days: int = 180,
|
1213
|
-
analysis_days: int = 30,
|
1214
|
-
dry_run: bool = True
|
1217
|
+
self, usage_threshold_days: int = 180, analysis_days: int = 30, dry_run: bool = True
|
1215
1218
|
) -> CostOptimizationResult:
|
1216
1219
|
"""
|
1217
1220
|
Business Scenario: Cleanup unused WorkSpaces with zero usage in last 6 months
|
@@ -1228,7 +1231,7 @@ class CostOptimizer(CloudOpsBase):
|
|
1228
1231
|
"""
|
1229
1232
|
operation_name = "WorkSpaces Cost Optimization"
|
1230
1233
|
print_header(f"🏢 {operation_name}")
|
1231
|
-
|
1234
|
+
|
1232
1235
|
# Import existing workspaces analyzer
|
1233
1236
|
try:
|
1234
1237
|
from runbooks.finops.workspaces_analyzer import WorkSpacesCostAnalyzer, analyze_workspaces
|
@@ -1246,25 +1249,22 @@ class CostOptimizer(CloudOpsBase):
|
|
1246
1249
|
execution_time=0.0,
|
1247
1250
|
resources_analyzed=0,
|
1248
1251
|
resources_impacted=[],
|
1249
|
-
business_metrics={
|
1250
|
-
"total_monthly_savings": 0.0,
|
1251
|
-
"overall_risk_level": "low"
|
1252
|
-
},
|
1252
|
+
business_metrics={"total_monthly_savings": 0.0, "overall_risk_level": "low"},
|
1253
1253
|
recommendations=[],
|
1254
1254
|
aws_profile_used=self.profile or "default",
|
1255
1255
|
current_monthly_spend=0.0,
|
1256
1256
|
optimized_monthly_spend=0.0,
|
1257
1257
|
savings_percentage=0.0,
|
1258
|
-
annual_savings=0.0
|
1258
|
+
annual_savings=0.0,
|
1259
1259
|
)
|
1260
|
-
|
1260
|
+
|
1261
1261
|
# Execute WorkSpaces analysis using proven finops function
|
1262
1262
|
analysis_results = analyze_workspaces(
|
1263
1263
|
profile=self.profile,
|
1264
1264
|
unused_days=usage_threshold_days,
|
1265
1265
|
analysis_days=analysis_days,
|
1266
1266
|
output_format="json",
|
1267
|
-
dry_run=dry_run
|
1267
|
+
dry_run=dry_run,
|
1268
1268
|
)
|
1269
1269
|
|
1270
1270
|
# Extract analysis results
|
@@ -1280,7 +1280,7 @@ class CostOptimizer(CloudOpsBase):
|
|
1280
1280
|
estimated_annual_savings = 0.0
|
1281
1281
|
unused_workspaces_count = 0
|
1282
1282
|
total_workspaces = 0
|
1283
|
-
|
1283
|
+
|
1284
1284
|
# Calculate savings percentage if we have baseline cost data
|
1285
1285
|
savings_percentage = 0.0
|
1286
1286
|
if summary.get("total_monthly_cost", 0) > 0:
|
@@ -1314,7 +1314,7 @@ class CostOptimizer(CloudOpsBase):
|
|
1314
1314
|
projected_savings=estimated_monthly_savings,
|
1315
1315
|
risk_level=RiskLevel.LOW,
|
1316
1316
|
business_criticality="low",
|
1317
|
-
modification_required=not dry_run
|
1317
|
+
modification_required=not dry_run,
|
1318
1318
|
)
|
1319
1319
|
],
|
1320
1320
|
# Business metrics for executive reporting
|
@@ -1322,117 +1322,129 @@ class CostOptimizer(CloudOpsBase):
|
|
1322
1322
|
"total_monthly_savings": estimated_monthly_savings,
|
1323
1323
|
"overall_risk_level": "low",
|
1324
1324
|
"unused_workspaces_count": unused_workspaces_count,
|
1325
|
-
"total_workspaces_analyzed": total_workspaces
|
1325
|
+
"total_workspaces_analyzed": total_workspaces,
|
1326
1326
|
},
|
1327
1327
|
recommendations=[
|
1328
1328
|
f"Terminate {unused_workspaces_count} unused WorkSpaces to save ${estimated_monthly_savings:.2f}/month",
|
1329
1329
|
f"Estimated annual savings: ${estimated_annual_savings:.2f}",
|
1330
1330
|
"Verify WorkSpaces are truly unused before termination",
|
1331
|
-
"Consider implementing usage monitoring for remaining WorkSpaces"
|
1331
|
+
"Consider implementing usage monitoring for remaining WorkSpaces",
|
1332
1332
|
],
|
1333
|
-
aws_profile_used=self.profile or "default"
|
1333
|
+
aws_profile_used=self.profile or "default",
|
1334
1334
|
)
|
1335
|
-
|
1335
|
+
|
1336
1336
|
async def optimize_rds_snapshots(
|
1337
|
-
self,
|
1338
|
-
snapshot_age_threshold_days: int = 90,
|
1339
|
-
dry_run: bool = True
|
1337
|
+
self, snapshot_age_threshold_days: int = 90, dry_run: bool = True
|
1340
1338
|
) -> CostOptimizationResult:
|
1341
1339
|
"""
|
1342
1340
|
Business Scenario: Delete RDS manual snapshots
|
1343
|
-
JIRA Reference: FinOps-23
|
1341
|
+
JIRA Reference: FinOps-23
|
1344
1342
|
Expected Savings: USD $5,000 – significant annual savingsly
|
1345
|
-
|
1343
|
+
|
1346
1344
|
Args:
|
1347
1345
|
snapshot_age_threshold_days: Age threshold for snapshot deletion
|
1348
1346
|
dry_run: If True, only analyze without deletion
|
1349
|
-
|
1347
|
+
|
1350
1348
|
Returns:
|
1351
1349
|
CostOptimizationResult with RDS snapshots cleanup analysis
|
1352
1350
|
"""
|
1353
1351
|
operation_name = "RDS Snapshots Cost Optimization"
|
1354
1352
|
print_header(f"💾 {operation_name} (FinOps-23)")
|
1355
|
-
|
1353
|
+
|
1356
1354
|
with create_progress_bar() as progress:
|
1357
1355
|
task = progress.add_task("Analyzing RDS manual snapshots...", total=100)
|
1358
|
-
|
1356
|
+
|
1359
1357
|
# Step 1: Discover manual RDS snapshots using proven AWS Config aggregator method
|
1360
1358
|
all_manual_snapshots = []
|
1361
1359
|
|
1362
1360
|
try:
|
1363
1361
|
# Use AWS Config aggregator to discover all RDS snapshots across organization
|
1364
|
-
config_client = self.session.client(
|
1362
|
+
config_client = self.session.client("config", region_name="ap-southeast-2")
|
1365
1363
|
|
1366
1364
|
# Get all RDS snapshots via AWS Config aggregator (proven method)
|
1367
1365
|
response = config_client.select_aggregate_resource_config(
|
1368
1366
|
Expression="SELECT configuration, accountId, awsRegion WHERE resourceType = 'AWS::RDS::DBSnapshot'",
|
1369
|
-
ConfigurationAggregatorName=
|
1370
|
-
MaxResults=100 # AWS limit is 100
|
1367
|
+
ConfigurationAggregatorName="organization-aggregator",
|
1368
|
+
MaxResults=100, # AWS limit is 100
|
1371
1369
|
)
|
1372
1370
|
|
1373
1371
|
print_info(f"Found {len(response.get('Results', []))} RDS snapshots via AWS Config aggregator")
|
1374
1372
|
|
1375
1373
|
# Process snapshots found by Config aggregator
|
1376
|
-
for result in response.get(
|
1374
|
+
for result in response.get("Results", []):
|
1377
1375
|
try:
|
1378
1376
|
resource_data = json.loads(result)
|
1379
|
-
config_data = resource_data.get(
|
1377
|
+
config_data = resource_data.get("configuration", {})
|
1380
1378
|
|
1381
1379
|
# Handle case where configuration might be a string
|
1382
1380
|
if isinstance(config_data, str):
|
1383
1381
|
config_data = json.loads(config_data)
|
1384
1382
|
|
1385
1383
|
# Filter for manual snapshots only
|
1386
|
-
if config_data.get(
|
1384
|
+
if config_data.get("snapshotType") == "manual":
|
1387
1385
|
# Create snapshot object compatible with describe_db_snapshots format
|
1388
1386
|
snapshot = {
|
1389
|
-
|
1390
|
-
|
1391
|
-
|
1392
|
-
|
1393
|
-
|
1394
|
-
|
1395
|
-
|
1396
|
-
|
1387
|
+
"DBSnapshotIdentifier": config_data.get("dBSnapshotIdentifier"),
|
1388
|
+
"SnapshotCreateTime": datetime.fromisoformat(
|
1389
|
+
config_data.get("snapshotCreateTime", "").replace("Z", "+00:00")
|
1390
|
+
)
|
1391
|
+
if config_data.get("snapshotCreateTime")
|
1392
|
+
else datetime.now(),
|
1393
|
+
"AllocatedStorage": config_data.get("allocatedStorage", 0),
|
1394
|
+
"DBInstanceIdentifier": config_data.get("dBInstanceIdentifier"),
|
1395
|
+
"SnapshotType": config_data.get("snapshotType"),
|
1396
|
+
"Status": config_data.get("status", "available"),
|
1397
|
+
"Engine": config_data.get("engine"),
|
1398
|
+
"EngineVersion": config_data.get("engineVersion"),
|
1397
1399
|
}
|
1398
1400
|
all_manual_snapshots.append(snapshot)
|
1399
1401
|
except Exception as e:
|
1400
1402
|
print_warning(f"Error processing snapshot from Config: {e}")
|
1401
1403
|
|
1402
|
-
print_success(
|
1404
|
+
print_success(
|
1405
|
+
f"Successfully processed {len(all_manual_snapshots)} manual snapshots from Config aggregator"
|
1406
|
+
)
|
1403
1407
|
|
1404
1408
|
except Exception as e:
|
1405
1409
|
print_warning(f"AWS Config aggregator query failed, falling back to regional discovery: {e}")
|
1406
1410
|
|
1407
1411
|
# Fallback to regional discovery if Config aggregator fails
|
1408
|
-
regions = [
|
1412
|
+
regions = [
|
1413
|
+
"us-east-1",
|
1414
|
+
"us-west-2",
|
1415
|
+
"ap-southeast-2",
|
1416
|
+
"eu-west-1",
|
1417
|
+
"ap-southeast-1",
|
1418
|
+
] # Extended regions
|
1409
1419
|
|
1410
1420
|
for region in regions:
|
1411
|
-
regional_client = self.session.client(
|
1421
|
+
regional_client = self.session.client("rds", region_name=region)
|
1412
1422
|
try:
|
1413
1423
|
# Get all manual snapshots in this region
|
1414
|
-
paginator = regional_client.get_paginator(
|
1415
|
-
page_iterator = paginator.paginate(SnapshotType=
|
1424
|
+
paginator = regional_client.get_paginator("describe_db_snapshots")
|
1425
|
+
page_iterator = paginator.paginate(SnapshotType="manual")
|
1416
1426
|
|
1417
1427
|
for page in page_iterator:
|
1418
|
-
all_manual_snapshots.extend(page.get(
|
1428
|
+
all_manual_snapshots.extend(page.get("DBSnapshots", []))
|
1419
1429
|
|
1420
|
-
print_info(
|
1430
|
+
print_info(
|
1431
|
+
f"Found {len([s for s in all_manual_snapshots if 'region' not in s])} manual snapshots in {region}"
|
1432
|
+
)
|
1421
1433
|
except Exception as e:
|
1422
1434
|
print_warning(f"Could not access region {region}: {e}")
|
1423
|
-
|
1435
|
+
|
1424
1436
|
progress.update(task, advance=40)
|
1425
|
-
|
1437
|
+
|
1426
1438
|
# Step 2: Filter old snapshots
|
1427
1439
|
cutoff_date = datetime.now() - timedelta(days=snapshot_age_threshold_days)
|
1428
1440
|
old_snapshots = []
|
1429
|
-
|
1441
|
+
|
1430
1442
|
for snapshot in all_manual_snapshots:
|
1431
|
-
if snapshot[
|
1443
|
+
if snapshot["SnapshotCreateTime"].replace(tzinfo=None) < cutoff_date:
|
1432
1444
|
old_snapshots.append(snapshot)
|
1433
|
-
|
1445
|
+
|
1434
1446
|
progress.update(task, advance=70)
|
1435
|
-
|
1447
|
+
|
1436
1448
|
# Step 3: Use enhanced RDS snapshot optimizer for consistent results
|
1437
1449
|
try:
|
1438
1450
|
from runbooks.finops.rds_snapshot_optimizer import EnhancedRDSSnapshotOptimizer
|
@@ -1451,23 +1463,23 @@ class CostOptimizer(CloudOpsBase):
|
|
1451
1463
|
)
|
1452
1464
|
|
1453
1465
|
# Use comprehensive scenario for realistic savings
|
1454
|
-
comprehensive_scenario = optimization_results[
|
1466
|
+
comprehensive_scenario = optimization_results["optimization_scenarios"]["comprehensive"]
|
1455
1467
|
|
1456
1468
|
# Create resource impacts for comprehensive scenario
|
1457
1469
|
resource_impacts = []
|
1458
|
-
for snapshot in comprehensive_scenario[
|
1470
|
+
for snapshot in comprehensive_scenario["snapshots"]:
|
1459
1471
|
resource_impacts.append(
|
1460
1472
|
ResourceImpact(
|
1461
1473
|
resource_type="rds-snapshot",
|
1462
|
-
resource_id=snapshot.get(
|
1463
|
-
region=snapshot.get(
|
1464
|
-
account_id=snapshot.get(
|
1465
|
-
estimated_monthly_cost=snapshot.get(
|
1466
|
-
projected_savings=snapshot.get(
|
1474
|
+
resource_id=snapshot.get("DBSnapshotIdentifier", "unknown"),
|
1475
|
+
region=snapshot.get("Region", "unknown"),
|
1476
|
+
account_id=snapshot.get("AccountId", "unknown"),
|
1477
|
+
estimated_monthly_cost=snapshot.get("EstimatedMonthlyCost", 0.0),
|
1478
|
+
projected_savings=snapshot.get("EstimatedMonthlyCost", 0.0),
|
1467
1479
|
risk_level=RiskLevel.MEDIUM,
|
1468
1480
|
modification_required=True,
|
1469
1481
|
resource_name=f"RDS Snapshot {snapshot.get('DBSnapshotIdentifier', 'unknown')}",
|
1470
|
-
estimated_downtime=0.0
|
1482
|
+
estimated_downtime=0.0,
|
1471
1483
|
)
|
1472
1484
|
)
|
1473
1485
|
|
@@ -1481,26 +1493,31 @@ class CostOptimizer(CloudOpsBase):
|
|
1481
1493
|
execution_time=30.0,
|
1482
1494
|
success=True,
|
1483
1495
|
error_message=None,
|
1484
|
-
resources_analyzed=optimization_results[
|
1496
|
+
resources_analyzed=optimization_results["total_snapshots"],
|
1485
1497
|
resources_impacted=resource_impacts,
|
1486
1498
|
business_metrics=self.create_business_metrics(
|
1487
|
-
total_savings=optimization_results[
|
1488
|
-
overall_risk=RiskLevel.MEDIUM
|
1499
|
+
total_savings=optimization_results["potential_monthly_savings"],
|
1500
|
+
overall_risk=RiskLevel.MEDIUM,
|
1489
1501
|
),
|
1490
1502
|
recommendations=[
|
1491
1503
|
f"Review {optimization_results['cleanup_candidates']} snapshots older than {snapshot_age_threshold_days} days",
|
1492
1504
|
f"Potential annual savings: ${optimization_results['potential_annual_savings']:,.2f}",
|
1493
1505
|
"Consider implementing automated retention policies",
|
1494
|
-
"Review backup requirements before deletion"
|
1506
|
+
"Review backup requirements before deletion",
|
1495
1507
|
],
|
1496
1508
|
# CostOptimizationResult specific fields
|
1497
|
-
current_monthly_spend=optimization_results.get(
|
1498
|
-
optimized_monthly_spend=optimization_results.get(
|
1499
|
-
|
1500
|
-
|
1501
|
-
|
1502
|
-
|
1503
|
-
|
1509
|
+
current_monthly_spend=optimization_results.get("current_monthly_spend", 0.0),
|
1510
|
+
optimized_monthly_spend=optimization_results.get("current_monthly_spend", 0.0)
|
1511
|
+
- optimization_results["potential_monthly_savings"],
|
1512
|
+
savings_percentage=(
|
1513
|
+
optimization_results["potential_monthly_savings"]
|
1514
|
+
/ max(optimization_results.get("current_monthly_spend", 1), 1)
|
1515
|
+
)
|
1516
|
+
* 100,
|
1517
|
+
annual_savings=optimization_results["potential_annual_savings"],
|
1518
|
+
total_monthly_savings=optimization_results["potential_monthly_savings"],
|
1519
|
+
affected_resources=optimization_results["cleanup_candidates"],
|
1520
|
+
resource_impacts=resource_impacts,
|
1504
1521
|
)
|
1505
1522
|
else:
|
1506
1523
|
print_warning("No snapshots discovered via enhanced optimizer")
|
@@ -1514,15 +1531,15 @@ class CostOptimizer(CloudOpsBase):
|
|
1514
1531
|
print_info("Using legacy optimization calculation...")
|
1515
1532
|
# Step 3: Calculate estimated savings (legacy)
|
1516
1533
|
# Based on JIRA data: measurable range range for manual snapshots
|
1517
|
-
total_size_gb = sum(snapshot.get(
|
1534
|
+
total_size_gb = sum(snapshot.get("AllocatedStorage", 0) for snapshot in old_snapshots)
|
1518
1535
|
estimated_monthly_savings = total_size_gb * 0.05 # ~$0.05/GB-month for snapshots
|
1519
1536
|
progress.update(task, advance=90)
|
1520
|
-
|
1537
|
+
|
1521
1538
|
# Step 4: Execute cleanup if not dry_run
|
1522
1539
|
if not dry_run and old_snapshots:
|
1523
1540
|
await self._execute_rds_snapshots_cleanup(old_snapshots)
|
1524
1541
|
progress.update(task, advance=100)
|
1525
|
-
|
1542
|
+
|
1526
1543
|
# Display results
|
1527
1544
|
results_table = create_table("RDS Snapshots Optimization Results")
|
1528
1545
|
results_table.add_row("Manual Snapshots Found", str(len(all_manual_snapshots)))
|
@@ -1532,7 +1549,7 @@ class CostOptimizer(CloudOpsBase):
|
|
1532
1549
|
results_table.add_row("Annual Savings", format_cost(estimated_monthly_savings * 12))
|
1533
1550
|
results_table.add_row("Execution Mode", "Analysis Only" if dry_run else "Cleanup Executed")
|
1534
1551
|
console.print(results_table)
|
1535
|
-
|
1552
|
+
|
1536
1553
|
return CostOptimizationResult(
|
1537
1554
|
scenario=BusinessScenario.COST_OPTIMIZATION,
|
1538
1555
|
scenario_name=operation_name,
|
@@ -1553,68 +1570,61 @@ class CostOptimizer(CloudOpsBase):
|
|
1553
1570
|
account_id=self.account_id,
|
1554
1571
|
estimated_monthly_cost=estimated_monthly_savings,
|
1555
1572
|
projected_savings=estimated_monthly_savings,
|
1556
|
-
risk_level=RiskLevel.MEDIUM
|
1573
|
+
risk_level=RiskLevel.MEDIUM,
|
1557
1574
|
)
|
1558
1575
|
],
|
1559
1576
|
# Add missing required fields
|
1560
1577
|
resources_analyzed=len(all_manual_snapshots),
|
1561
1578
|
resources_impacted=[], # Must be a list
|
1562
|
-
business_metrics={
|
1563
|
-
"total_monthly_savings": estimated_monthly_savings,
|
1564
|
-
"overall_risk_level": "medium"
|
1565
|
-
},
|
1579
|
+
business_metrics={"total_monthly_savings": estimated_monthly_savings, "overall_risk_level": "medium"},
|
1566
1580
|
recommendations=[],
|
1567
1581
|
aws_profile_used=self.profile or "default",
|
1568
1582
|
current_monthly_spend=0.0,
|
1569
|
-
optimized_monthly_spend=0.0
|
1583
|
+
optimized_monthly_spend=0.0,
|
1570
1584
|
)
|
1571
|
-
|
1585
|
+
|
1572
1586
|
async def investigate_commvault_ec2(
|
1573
|
-
self,
|
1574
|
-
account_id: Optional[str] = None,
|
1575
|
-
dry_run: bool = True
|
1587
|
+
self, account_id: Optional[str] = None, dry_run: bool = True
|
1576
1588
|
) -> CostOptimizationResult:
|
1577
1589
|
"""
|
1578
1590
|
Business Scenario: Investigate Commvault Account and EC2 instances
|
1579
1591
|
JIRA Reference: FinOps-25
|
1580
1592
|
Expected Savings: TBD via utilization analysis
|
1581
|
-
|
1593
|
+
|
1582
1594
|
Args:
|
1583
1595
|
account_id: Commvault backups account ID
|
1584
1596
|
dry_run: If True, only analyze without action
|
1585
|
-
|
1597
|
+
|
1586
1598
|
Returns:
|
1587
1599
|
CostOptimizationResult with Commvault EC2 investigation analysis
|
1588
1600
|
"""
|
1589
1601
|
operation_name = "Commvault EC2 Investigation"
|
1590
1602
|
print_header(f"🔍 {operation_name} (FinOps-25)")
|
1591
|
-
|
1603
|
+
|
1592
1604
|
print_info(f"Analyzing Commvault account: {account_id}")
|
1593
1605
|
print_warning("This investigation determines if EC2 instances are actively used for backups")
|
1594
|
-
|
1606
|
+
|
1595
1607
|
with create_progress_bar() as progress:
|
1596
1608
|
task = progress.add_task("Investigating Commvault EC2 instances...", total=100)
|
1597
|
-
|
1609
|
+
|
1598
1610
|
# Step 1: Discover EC2 instances in Commvault account
|
1599
1611
|
# Note: This would require cross-account access or account switching
|
1600
1612
|
try:
|
1601
|
-
ec2_client = self.session.client(
|
1613
|
+
ec2_client = self.session.client("ec2", region_name=self.region)
|
1602
1614
|
response = ec2_client.describe_instances(
|
1603
|
-
Filters=[
|
1604
|
-
{'Name': 'instance-state-name', 'Values': ['running', 'stopped']}
|
1605
|
-
]
|
1615
|
+
Filters=[{"Name": "instance-state-name", "Values": ["running", "stopped"]}]
|
1606
1616
|
)
|
1607
|
-
|
1617
|
+
|
1608
1618
|
commvault_instances = []
|
1609
|
-
for reservation in response[
|
1610
|
-
commvault_instances.extend(reservation[
|
1611
|
-
|
1619
|
+
for reservation in response["Reservations"]:
|
1620
|
+
commvault_instances.extend(reservation["Instances"])
|
1621
|
+
|
1612
1622
|
progress.update(task, advance=40)
|
1613
|
-
|
1623
|
+
|
1614
1624
|
except Exception as e:
|
1615
1625
|
print_error(f"Cannot access Commvault account {account_id}: {e}")
|
1616
1626
|
print_info("Investigation requires appropriate cross-account IAM permissions")
|
1617
|
-
|
1627
|
+
|
1618
1628
|
return CostOptimizationResult(
|
1619
1629
|
scenario=BusinessScenario.COST_OPTIMIZATION,
|
1620
1630
|
scenario_name=operation_name,
|
@@ -1626,39 +1636,36 @@ class CostOptimizer(CloudOpsBase):
|
|
1626
1636
|
execution_time=0.0,
|
1627
1637
|
resources_analyzed=0,
|
1628
1638
|
resources_impacted=[], # Must be a list
|
1629
|
-
business_metrics={
|
1630
|
-
"total_monthly_savings": 0.0,
|
1631
|
-
"overall_risk_level": "high"
|
1632
|
-
},
|
1639
|
+
business_metrics={"total_monthly_savings": 0.0, "overall_risk_level": "high"},
|
1633
1640
|
recommendations=[],
|
1634
1641
|
aws_profile_used=self.profile or "default",
|
1635
1642
|
current_monthly_spend=0.0,
|
1636
1643
|
optimized_monthly_spend=0.0,
|
1637
|
-
savings_percentage=0.0
|
1644
|
+
savings_percentage=0.0,
|
1638
1645
|
)
|
1639
|
-
|
1646
|
+
|
1640
1647
|
# Step 2: Analyze instance utilization patterns
|
1641
1648
|
active_instances = []
|
1642
1649
|
idle_instances = []
|
1643
|
-
|
1650
|
+
|
1644
1651
|
for instance in commvault_instances:
|
1645
1652
|
# This is a simplified analysis - real implementation would check:
|
1646
1653
|
# - CloudWatch metrics for CPU/Network/Disk utilization
|
1647
1654
|
# - Backup job logs
|
1648
1655
|
# - Instance tags for backup software identification
|
1649
|
-
if instance[
|
1656
|
+
if instance["State"]["Name"] == "running":
|
1650
1657
|
active_instances.append(instance)
|
1651
1658
|
else:
|
1652
1659
|
idle_instances.append(instance)
|
1653
|
-
|
1660
|
+
|
1654
1661
|
progress.update(task, advance=80)
|
1655
|
-
|
1662
|
+
|
1656
1663
|
# Step 3: Generate investigation report
|
1657
1664
|
estimated_monthly_cost = len(active_instances) * 50 # Rough estimate
|
1658
1665
|
potential_savings = len(idle_instances) * 50
|
1659
|
-
|
1666
|
+
|
1660
1667
|
progress.update(task, advance=100)
|
1661
|
-
|
1668
|
+
|
1662
1669
|
# Display investigation results
|
1663
1670
|
results_table = create_table("Commvault EC2 Investigation Results")
|
1664
1671
|
results_table.add_row("Total EC2 Instances", str(len(commvault_instances)))
|
@@ -1668,7 +1675,7 @@ class CostOptimizer(CloudOpsBase):
|
|
1668
1675
|
results_table.add_row("Potential Savings (if idle)", format_cost(potential_savings))
|
1669
1676
|
results_table.add_row("Investigation Status", "Framework Established")
|
1670
1677
|
console.print(results_table)
|
1671
|
-
|
1678
|
+
|
1672
1679
|
# Investigation-specific recommendations
|
1673
1680
|
recommendations_panel = create_panel(
|
1674
1681
|
"📋 Investigation Recommendations:\n"
|
@@ -1677,10 +1684,10 @@ class CostOptimizer(CloudOpsBase):
|
|
1677
1684
|
"3. Analyze CloudWatch metrics for actual utilization\n"
|
1678
1685
|
"4. Coordinate with backup team before any terminations\n"
|
1679
1686
|
"5. Implement monitoring for backup service health",
|
1680
|
-
title="Next Steps"
|
1687
|
+
title="Next Steps",
|
1681
1688
|
)
|
1682
1689
|
console.print(recommendations_panel)
|
1683
|
-
|
1690
|
+
|
1684
1691
|
return CostOptimizationResult(
|
1685
1692
|
scenario=BusinessScenario.COST_OPTIMIZATION,
|
1686
1693
|
scenario_name=operation_name,
|
@@ -1698,26 +1705,23 @@ class CostOptimizer(CloudOpsBase):
|
|
1698
1705
|
resource_type="AWS::EC2::Instance",
|
1699
1706
|
action="investigate",
|
1700
1707
|
monthly_savings=potential_savings,
|
1701
|
-
risk_level=RiskLevel.HIGH # High risk due to potential backup disruption
|
1708
|
+
risk_level=RiskLevel.HIGH, # High risk due to potential backup disruption
|
1702
1709
|
)
|
1703
1710
|
],
|
1704
1711
|
# Add missing required fields
|
1705
1712
|
resources_analyzed=len(commvault_instances),
|
1706
1713
|
resources_impacted=[], # Must be a list
|
1707
|
-
business_metrics={
|
1708
|
-
"total_monthly_savings": potential_savings,
|
1709
|
-
"overall_risk_level": "high"
|
1710
|
-
},
|
1714
|
+
business_metrics={"total_monthly_savings": potential_savings, "overall_risk_level": "high"},
|
1711
1715
|
recommendations=[],
|
1712
1716
|
aws_profile_used=self.profile or "default",
|
1713
1717
|
current_monthly_spend=0.0,
|
1714
|
-
optimized_monthly_spend=0.0
|
1718
|
+
optimized_monthly_spend=0.0,
|
1715
1719
|
)
|
1716
|
-
|
1720
|
+
|
1717
1721
|
async def _execute_workspaces_cleanup(self, unused_workspaces: List[dict]) -> None:
|
1718
1722
|
"""Execute WorkSpaces cleanup with safety controls."""
|
1719
1723
|
print_warning(f"Executing WorkSpaces cleanup for {len(unused_workspaces)} instances")
|
1720
|
-
|
1724
|
+
|
1721
1725
|
for workspace in unused_workspaces:
|
1722
1726
|
try:
|
1723
1727
|
# This would require WorkSpaces client and proper error handling
|
@@ -1726,15 +1730,15 @@ class CostOptimizer(CloudOpsBase):
|
|
1726
1730
|
await asyncio.sleep(0.1) # Prevent rate limiting
|
1727
1731
|
except Exception as e:
|
1728
1732
|
print_error(f"Failed to terminate WorkSpace: {e}")
|
1729
|
-
|
1733
|
+
|
1730
1734
|
async def _execute_rds_snapshots_cleanup(self, old_snapshots: List[dict]) -> None:
|
1731
1735
|
"""Execute RDS snapshots cleanup with safety controls."""
|
1732
1736
|
print_warning(f"Executing RDS snapshots cleanup for {len(old_snapshots)} snapshots")
|
1733
|
-
|
1737
|
+
|
1734
1738
|
for snapshot in old_snapshots:
|
1735
1739
|
try:
|
1736
1740
|
# This would require RDS client calls with proper error handling
|
1737
|
-
snapshot_id = snapshot.get(
|
1741
|
+
snapshot_id = snapshot.get("DBSnapshotIdentifier", "unknown")
|
1738
1742
|
print_info(f"Would delete RDS snapshot: {snapshot_id}")
|
1739
1743
|
# rds_client.delete_db_snapshot(DBSnapshotIdentifier=snapshot_id)
|
1740
1744
|
await asyncio.sleep(0.2) # Prevent rate limiting
|
@@ -1742,42 +1746,40 @@ class CostOptimizer(CloudOpsBase):
|
|
1742
1746
|
print_error(f"Failed to delete snapshot: {e}")
|
1743
1747
|
|
1744
1748
|
async def emergency_cost_response(
|
1745
|
-
self,
|
1746
|
-
cost_spike_threshold: float = 5000.0,
|
1747
|
-
analysis_days: int = 7
|
1749
|
+
self, cost_spike_threshold: float = 5000.0, analysis_days: int = 7
|
1748
1750
|
) -> CostOptimizationResult:
|
1749
1751
|
"""
|
1750
1752
|
Business Scenario: Emergency response to cost spikes
|
1751
|
-
|
1753
|
+
|
1752
1754
|
Designed for: CFO escalations, budget overruns, unexpected charges
|
1753
1755
|
Response time: <30 minutes for initial analysis
|
1754
|
-
|
1756
|
+
|
1755
1757
|
Args:
|
1756
1758
|
cost_spike_threshold: Minimum cost increase to trigger analysis
|
1757
1759
|
analysis_days: Days to analyze for cost changes
|
1758
|
-
|
1760
|
+
|
1759
1761
|
Returns:
|
1760
1762
|
CostOptimizationResult with emergency cost analysis
|
1761
1763
|
"""
|
1762
1764
|
operation_name = "Emergency Cost Spike Response"
|
1763
1765
|
print_header(f"🚨 {operation_name}")
|
1764
|
-
|
1766
|
+
|
1765
1767
|
print_warning(f"Analyzing cost increases >${format_cost(cost_spike_threshold)}")
|
1766
|
-
|
1768
|
+
|
1767
1769
|
# This would integrate multiple cost optimization scenarios
|
1768
1770
|
# for rapid cost reduction in emergency situations
|
1769
|
-
|
1771
|
+
|
1770
1772
|
emergency_actions = [
|
1771
1773
|
"Immediate idle resource identification and shutdown",
|
1772
1774
|
"Temporary scaling reduction for non-critical services",
|
1773
1775
|
"Cost anomaly detection and root cause analysis",
|
1774
|
-
"Executive cost impact report generation"
|
1776
|
+
"Executive cost impact report generation",
|
1775
1777
|
]
|
1776
|
-
|
1778
|
+
|
1777
1779
|
print_info("Emergency response actions:")
|
1778
1780
|
for action in emergency_actions:
|
1779
1781
|
print_info(f" • {action}")
|
1780
|
-
|
1782
|
+
|
1781
1783
|
return CostOptimizationResult(
|
1782
1784
|
scenario=BusinessScenario.COST_OPTIMIZATION,
|
1783
1785
|
scenario_name="Emergency Cost Spike Response",
|
@@ -1790,12 +1792,12 @@ class CostOptimizer(CloudOpsBase):
|
|
1790
1792
|
resources_impacted=[],
|
1791
1793
|
business_metrics=self.create_business_metrics(
|
1792
1794
|
total_savings=cost_spike_threshold * 0.3, # Target 30% reduction
|
1793
|
-
overall_risk=RiskLevel.HIGH # Emergency actions carry higher risk
|
1795
|
+
overall_risk=RiskLevel.HIGH, # Emergency actions carry higher risk
|
1794
1796
|
),
|
1795
1797
|
recommendations=[
|
1796
1798
|
"Implement cost anomaly detection and alerting",
|
1797
1799
|
"Establish cost governance policies and approval workflows",
|
1798
|
-
"Regular cost optimization reviews to prevent spikes"
|
1800
|
+
"Regular cost optimization reviews to prevent spikes",
|
1799
1801
|
],
|
1800
1802
|
aws_profile_used=self.profile,
|
1801
1803
|
regions_analyzed=[],
|
@@ -1804,6 +1806,6 @@ class CostOptimizer(CloudOpsBase):
|
|
1804
1806
|
optimized_monthly_spend=cost_spike_threshold * 0.7,
|
1805
1807
|
savings_percentage=30.0,
|
1806
1808
|
idle_resources=[],
|
1807
|
-
oversized_resources=[],
|
1808
|
-
unattached_resources=[]
|
1809
|
-
)
|
1809
|
+
oversized_resources=[],
|
1810
|
+
unattached_resources=[],
|
1811
|
+
)
|