runbooks 1.1.4__py3-none-any.whl → 1.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- runbooks/__init__.py +31 -2
- runbooks/__init___optimized.py +18 -4
- runbooks/_platform/__init__.py +1 -5
- runbooks/_platform/core/runbooks_wrapper.py +141 -138
- runbooks/aws2/accuracy_validator.py +812 -0
- runbooks/base.py +7 -0
- runbooks/cfat/assessment/compliance.py +1 -1
- runbooks/cfat/assessment/runner.py +1 -0
- runbooks/cfat/cloud_foundations_assessment.py +227 -239
- runbooks/cli/__init__.py +1 -1
- runbooks/cli/commands/cfat.py +64 -23
- runbooks/cli/commands/finops.py +1005 -54
- runbooks/cli/commands/inventory.py +135 -91
- runbooks/cli/commands/operate.py +9 -36
- runbooks/cli/commands/security.py +42 -18
- runbooks/cli/commands/validation.py +432 -18
- runbooks/cli/commands/vpc.py +81 -17
- runbooks/cli/registry.py +22 -10
- runbooks/cloudops/__init__.py +20 -27
- runbooks/cloudops/base.py +96 -107
- runbooks/cloudops/cost_optimizer.py +544 -542
- runbooks/cloudops/infrastructure_optimizer.py +5 -4
- runbooks/cloudops/interfaces.py +224 -225
- runbooks/cloudops/lifecycle_manager.py +5 -4
- runbooks/cloudops/mcp_cost_validation.py +252 -235
- runbooks/cloudops/models.py +78 -53
- runbooks/cloudops/monitoring_automation.py +5 -4
- runbooks/cloudops/notebook_framework.py +177 -213
- runbooks/cloudops/security_enforcer.py +125 -159
- runbooks/common/accuracy_validator.py +17 -12
- runbooks/common/aws_pricing.py +349 -326
- runbooks/common/aws_pricing_api.py +211 -212
- runbooks/common/aws_profile_manager.py +40 -36
- runbooks/common/aws_utils.py +74 -79
- runbooks/common/business_logic.py +126 -104
- runbooks/common/cli_decorators.py +36 -60
- runbooks/common/comprehensive_cost_explorer_integration.py +455 -463
- runbooks/common/cross_account_manager.py +197 -204
- runbooks/common/date_utils.py +27 -39
- runbooks/common/decorators.py +29 -19
- runbooks/common/dry_run_examples.py +173 -208
- runbooks/common/dry_run_framework.py +157 -155
- runbooks/common/enhanced_exception_handler.py +15 -4
- runbooks/common/enhanced_logging_example.py +50 -64
- runbooks/common/enhanced_logging_integration_example.py +65 -37
- runbooks/common/env_utils.py +16 -16
- runbooks/common/error_handling.py +40 -38
- runbooks/common/lazy_loader.py +41 -23
- runbooks/common/logging_integration_helper.py +79 -86
- runbooks/common/mcp_cost_explorer_integration.py +476 -493
- runbooks/common/mcp_integration.py +99 -79
- runbooks/common/memory_optimization.py +140 -118
- runbooks/common/module_cli_base.py +37 -58
- runbooks/common/organizations_client.py +175 -193
- runbooks/common/patterns.py +23 -25
- runbooks/common/performance_monitoring.py +67 -71
- runbooks/common/performance_optimization_engine.py +283 -274
- runbooks/common/profile_utils.py +111 -37
- runbooks/common/rich_utils.py +315 -141
- runbooks/common/sre_performance_suite.py +177 -186
- runbooks/enterprise/__init__.py +1 -1
- runbooks/enterprise/logging.py +144 -106
- runbooks/enterprise/security.py +187 -204
- runbooks/enterprise/validation.py +43 -56
- runbooks/finops/__init__.py +26 -30
- runbooks/finops/account_resolver.py +1 -1
- runbooks/finops/advanced_optimization_engine.py +980 -0
- runbooks/finops/automation_core.py +268 -231
- runbooks/finops/business_case_config.py +184 -179
- runbooks/finops/cli.py +660 -139
- runbooks/finops/commvault_ec2_analysis.py +157 -164
- runbooks/finops/compute_cost_optimizer.py +336 -320
- runbooks/finops/config.py +20 -20
- runbooks/finops/cost_optimizer.py +484 -618
- runbooks/finops/cost_processor.py +332 -214
- runbooks/finops/dashboard_runner.py +1006 -172
- runbooks/finops/ebs_cost_optimizer.py +991 -657
- runbooks/finops/elastic_ip_optimizer.py +317 -257
- runbooks/finops/enhanced_mcp_integration.py +340 -0
- runbooks/finops/enhanced_progress.py +32 -29
- runbooks/finops/enhanced_trend_visualization.py +3 -2
- runbooks/finops/enterprise_wrappers.py +223 -285
- runbooks/finops/executive_export.py +203 -160
- runbooks/finops/helpers.py +130 -288
- runbooks/finops/iam_guidance.py +1 -1
- runbooks/finops/infrastructure/__init__.py +80 -0
- runbooks/finops/infrastructure/commands.py +506 -0
- runbooks/finops/infrastructure/load_balancer_optimizer.py +866 -0
- runbooks/finops/infrastructure/vpc_endpoint_optimizer.py +832 -0
- runbooks/finops/markdown_exporter.py +337 -174
- runbooks/finops/mcp_validator.py +1952 -0
- runbooks/finops/nat_gateway_optimizer.py +1512 -481
- runbooks/finops/network_cost_optimizer.py +657 -587
- runbooks/finops/notebook_utils.py +226 -188
- runbooks/finops/optimization_engine.py +1136 -0
- runbooks/finops/optimizer.py +19 -23
- runbooks/finops/rds_snapshot_optimizer.py +367 -411
- runbooks/finops/reservation_optimizer.py +427 -363
- runbooks/finops/scenario_cli_integration.py +64 -65
- runbooks/finops/scenarios.py +1277 -438
- runbooks/finops/schemas.py +218 -182
- runbooks/finops/snapshot_manager.py +2289 -0
- runbooks/finops/types.py +3 -3
- runbooks/finops/validation_framework.py +259 -265
- runbooks/finops/vpc_cleanup_exporter.py +189 -144
- runbooks/finops/vpc_cleanup_optimizer.py +591 -573
- runbooks/finops/workspaces_analyzer.py +171 -182
- runbooks/integration/__init__.py +89 -0
- runbooks/integration/mcp_integration.py +1920 -0
- runbooks/inventory/CLAUDE.md +816 -0
- runbooks/inventory/__init__.py +2 -2
- runbooks/inventory/aws_decorators.py +2 -3
- runbooks/inventory/check_cloudtrail_compliance.py +2 -4
- runbooks/inventory/check_controltower_readiness.py +152 -151
- runbooks/inventory/check_landingzone_readiness.py +85 -84
- runbooks/inventory/cloud_foundations_integration.py +144 -149
- runbooks/inventory/collectors/aws_comprehensive.py +1 -1
- runbooks/inventory/collectors/aws_networking.py +109 -99
- runbooks/inventory/collectors/base.py +4 -0
- runbooks/inventory/core/collector.py +495 -313
- runbooks/inventory/core/formatter.py +11 -0
- runbooks/inventory/draw_org_structure.py +8 -9
- runbooks/inventory/drift_detection_cli.py +69 -96
- runbooks/inventory/ec2_vpc_utils.py +2 -2
- runbooks/inventory/find_cfn_drift_detection.py +5 -7
- runbooks/inventory/find_cfn_orphaned_stacks.py +7 -9
- runbooks/inventory/find_cfn_stackset_drift.py +5 -6
- runbooks/inventory/find_ec2_security_groups.py +48 -42
- runbooks/inventory/find_landingzone_versions.py +4 -6
- runbooks/inventory/find_vpc_flow_logs.py +7 -9
- runbooks/inventory/inventory_mcp_cli.py +48 -46
- runbooks/inventory/inventory_modules.py +103 -91
- runbooks/inventory/list_cfn_stacks.py +9 -10
- runbooks/inventory/list_cfn_stackset_operation_results.py +1 -3
- runbooks/inventory/list_cfn_stackset_operations.py +79 -57
- runbooks/inventory/list_cfn_stacksets.py +8 -10
- runbooks/inventory/list_config_recorders_delivery_channels.py +49 -39
- runbooks/inventory/list_ds_directories.py +65 -53
- runbooks/inventory/list_ec2_availability_zones.py +2 -4
- runbooks/inventory/list_ec2_ebs_volumes.py +32 -35
- runbooks/inventory/list_ec2_instances.py +23 -28
- runbooks/inventory/list_ecs_clusters_and_tasks.py +26 -34
- runbooks/inventory/list_elbs_load_balancers.py +22 -20
- runbooks/inventory/list_enis_network_interfaces.py +26 -33
- runbooks/inventory/list_guardduty_detectors.py +2 -4
- runbooks/inventory/list_iam_policies.py +2 -4
- runbooks/inventory/list_iam_roles.py +5 -7
- runbooks/inventory/list_iam_saml_providers.py +4 -6
- runbooks/inventory/list_lambda_functions.py +38 -38
- runbooks/inventory/list_org_accounts.py +6 -8
- runbooks/inventory/list_org_accounts_users.py +55 -44
- runbooks/inventory/list_rds_db_instances.py +31 -33
- runbooks/inventory/list_rds_snapshots_aggregator.py +192 -208
- runbooks/inventory/list_route53_hosted_zones.py +3 -5
- runbooks/inventory/list_servicecatalog_provisioned_products.py +37 -41
- runbooks/inventory/list_sns_topics.py +2 -4
- runbooks/inventory/list_ssm_parameters.py +4 -7
- runbooks/inventory/list_vpc_subnets.py +2 -4
- runbooks/inventory/list_vpcs.py +7 -10
- runbooks/inventory/mcp_inventory_validator.py +554 -468
- runbooks/inventory/mcp_vpc_validator.py +359 -442
- runbooks/inventory/organizations_discovery.py +63 -55
- runbooks/inventory/recover_cfn_stack_ids.py +7 -8
- runbooks/inventory/requirements.txt +0 -1
- runbooks/inventory/rich_inventory_display.py +35 -34
- runbooks/inventory/run_on_multi_accounts.py +3 -5
- runbooks/inventory/unified_validation_engine.py +281 -253
- runbooks/inventory/verify_ec2_security_groups.py +1 -1
- runbooks/inventory/vpc_analyzer.py +735 -697
- runbooks/inventory/vpc_architecture_validator.py +293 -348
- runbooks/inventory/vpc_dependency_analyzer.py +384 -380
- runbooks/inventory/vpc_flow_analyzer.py +1 -1
- runbooks/main.py +49 -34
- runbooks/main_final.py +91 -60
- runbooks/main_minimal.py +22 -10
- runbooks/main_optimized.py +131 -100
- runbooks/main_ultra_minimal.py +7 -2
- runbooks/mcp/__init__.py +36 -0
- runbooks/mcp/integration.py +679 -0
- runbooks/monitoring/performance_monitor.py +9 -4
- runbooks/operate/dynamodb_operations.py +3 -1
- runbooks/operate/ec2_operations.py +145 -137
- runbooks/operate/iam_operations.py +146 -152
- runbooks/operate/networking_cost_heatmap.py +29 -8
- runbooks/operate/rds_operations.py +223 -254
- runbooks/operate/s3_operations.py +107 -118
- runbooks/operate/vpc_operations.py +646 -616
- runbooks/remediation/base.py +1 -1
- runbooks/remediation/commons.py +10 -7
- runbooks/remediation/commvault_ec2_analysis.py +70 -66
- runbooks/remediation/ec2_unattached_ebs_volumes.py +1 -0
- runbooks/remediation/multi_account.py +24 -21
- runbooks/remediation/rds_snapshot_list.py +86 -60
- runbooks/remediation/remediation_cli.py +92 -146
- runbooks/remediation/universal_account_discovery.py +83 -79
- runbooks/remediation/workspaces_list.py +46 -41
- runbooks/security/__init__.py +19 -0
- runbooks/security/assessment_runner.py +1150 -0
- runbooks/security/baseline_checker.py +812 -0
- runbooks/security/cloudops_automation_security_validator.py +509 -535
- runbooks/security/compliance_automation_engine.py +17 -17
- runbooks/security/config/__init__.py +2 -2
- runbooks/security/config/compliance_config.py +50 -50
- runbooks/security/config_template_generator.py +63 -76
- runbooks/security/enterprise_security_framework.py +1 -1
- runbooks/security/executive_security_dashboard.py +519 -508
- runbooks/security/multi_account_security_controls.py +959 -1210
- runbooks/security/real_time_security_monitor.py +422 -444
- runbooks/security/security_baseline_tester.py +1 -1
- runbooks/security/security_cli.py +143 -112
- runbooks/security/test_2way_validation.py +439 -0
- runbooks/security/two_way_validation_framework.py +852 -0
- runbooks/sre/production_monitoring_framework.py +167 -177
- runbooks/tdd/__init__.py +15 -0
- runbooks/tdd/cli.py +1071 -0
- runbooks/utils/__init__.py +14 -17
- runbooks/utils/logger.py +7 -2
- runbooks/utils/version_validator.py +50 -47
- runbooks/validation/__init__.py +6 -6
- runbooks/validation/cli.py +9 -3
- runbooks/validation/comprehensive_2way_validator.py +745 -704
- runbooks/validation/mcp_validator.py +906 -228
- runbooks/validation/terraform_citations_validator.py +104 -115
- runbooks/validation/terraform_drift_detector.py +461 -454
- runbooks/vpc/README.md +617 -0
- runbooks/vpc/__init__.py +8 -1
- runbooks/vpc/analyzer.py +577 -0
- runbooks/vpc/cleanup_wrapper.py +476 -413
- runbooks/vpc/cli_cloudtrail_commands.py +339 -0
- runbooks/vpc/cli_mcp_validation_commands.py +480 -0
- runbooks/vpc/cloudtrail_audit_integration.py +717 -0
- runbooks/vpc/config.py +92 -97
- runbooks/vpc/cost_engine.py +411 -148
- runbooks/vpc/cost_explorer_integration.py +553 -0
- runbooks/vpc/cross_account_session.py +101 -106
- runbooks/vpc/enhanced_mcp_validation.py +917 -0
- runbooks/vpc/eni_gate_validator.py +961 -0
- runbooks/vpc/heatmap_engine.py +185 -160
- runbooks/vpc/mcp_no_eni_validator.py +680 -639
- runbooks/vpc/nat_gateway_optimizer.py +358 -0
- runbooks/vpc/networking_wrapper.py +15 -8
- runbooks/vpc/pdca_remediation_planner.py +528 -0
- runbooks/vpc/performance_optimized_analyzer.py +219 -231
- runbooks/vpc/runbooks_adapter.py +1167 -241
- runbooks/vpc/tdd_red_phase_stubs.py +601 -0
- runbooks/vpc/test_data_loader.py +358 -0
- runbooks/vpc/tests/conftest.py +314 -4
- runbooks/vpc/tests/test_cleanup_framework.py +1022 -0
- runbooks/vpc/tests/test_cost_engine.py +0 -2
- runbooks/vpc/topology_generator.py +326 -0
- runbooks/vpc/unified_scenarios.py +1297 -1124
- runbooks/vpc/vpc_cleanup_integration.py +1943 -1115
- runbooks-1.1.6.dist-info/METADATA +327 -0
- runbooks-1.1.6.dist-info/RECORD +489 -0
- runbooks/finops/README.md +0 -414
- runbooks/finops/accuracy_cross_validator.py +0 -647
- runbooks/finops/business_cases.py +0 -950
- runbooks/finops/dashboard_router.py +0 -922
- runbooks/finops/ebs_optimizer.py +0 -973
- runbooks/finops/embedded_mcp_validator.py +0 -1629
- runbooks/finops/enhanced_dashboard_runner.py +0 -527
- runbooks/finops/finops_dashboard.py +0 -584
- runbooks/finops/finops_scenarios.py +0 -1218
- runbooks/finops/legacy_migration.py +0 -730
- runbooks/finops/multi_dashboard.py +0 -1519
- runbooks/finops/single_dashboard.py +0 -1113
- runbooks/finops/unlimited_scenarios.py +0 -393
- runbooks-1.1.4.dist-info/METADATA +0 -800
- runbooks-1.1.4.dist-info/RECORD +0 -468
- {runbooks-1.1.4.dist-info → runbooks-1.1.6.dist-info}/WHEEL +0 -0
- {runbooks-1.1.4.dist-info → runbooks-1.1.6.dist-info}/entry_points.txt +0 -0
- {runbooks-1.1.4.dist-info → runbooks-1.1.6.dist-info}/licenses/LICENSE +0 -0
- {runbooks-1.1.4.dist-info → runbooks-1.1.6.dist-info}/top_level.txt +0 -0
@@ -15,6 +15,20 @@ This module provides comprehensive EC2 compute cost optimization analysis follow
|
|
15
15
|
- Cost savings calculation with enterprise MCP validation (≥99.5% accuracy)
|
16
16
|
- Safety analysis with dependency mapping and business impact assessment
|
17
17
|
|
18
|
+
Enterprise EC2 Rightsizing Patterns (Production Validated):
|
19
|
+
- Multi-account EC2 utilization analysis with CloudWatch metrics validation
|
20
|
+
- Graviton migration opportunities for 20-40% performance + cost improvement
|
21
|
+
- CPU utilization analysis identifying underutilized instances (<10% for 90+ days)
|
22
|
+
- Memory optimization patterns for workload-appropriate instance families
|
23
|
+
- Development environment rightsizing with non-production workload patterns
|
24
|
+
|
25
|
+
Proven Optimization Scenarios:
|
26
|
+
- EC2 decommission analysis targeting $200K+ annual savings potential
|
27
|
+
- Utilization-based rightsizing with automated candidate identification
|
28
|
+
- Application owner validation workflows for business-critical workloads
|
29
|
+
- Instance family migration recommendations (x86 → Graviton, General → Compute Optimized)
|
30
|
+
- Temporal optimization patterns for development/testing workloads
|
31
|
+
|
18
32
|
Strategic Alignment:
|
19
33
|
- "Do one thing and do it well": EC2 compute optimization specialization
|
20
34
|
- "Move Fast, But Not So Fast We Crash": Safety-first analysis approach
|
@@ -25,27 +39,37 @@ Strategic Alignment:
|
|
25
39
|
import asyncio
|
26
40
|
import logging
|
27
41
|
import time
|
42
|
+
from dataclasses import dataclass
|
28
43
|
from datetime import datetime, timedelta
|
29
44
|
from typing import Any, Dict, List, Optional, Tuple
|
30
|
-
from dataclasses import dataclass
|
31
45
|
|
32
46
|
import boto3
|
33
47
|
import click
|
34
48
|
from botocore.exceptions import ClientError, NoCredentialsError
|
35
49
|
from pydantic import BaseModel, Field
|
36
50
|
|
51
|
+
from ..common.profile_utils import get_profile_for_operation
|
37
52
|
from ..common.rich_utils import (
|
38
|
-
|
39
|
-
|
53
|
+
STATUS_INDICATORS,
|
54
|
+
console,
|
55
|
+
create_panel,
|
56
|
+
create_progress_bar,
|
57
|
+
create_table,
|
58
|
+
format_cost,
|
59
|
+
print_error,
|
60
|
+
print_header,
|
61
|
+
print_info,
|
62
|
+
print_success,
|
63
|
+
print_warning,
|
40
64
|
)
|
41
|
-
from .
|
42
|
-
from ..common.profile_utils import get_profile_for_operation
|
65
|
+
from .mcp_validator import EmbeddedMCPValidator
|
43
66
|
|
44
67
|
logger = logging.getLogger(__name__)
|
45
68
|
|
46
69
|
|
47
70
|
class EC2InstanceDetails(BaseModel):
|
48
71
|
"""EC2 Instance details from EC2 API."""
|
72
|
+
|
49
73
|
instance_id: str
|
50
74
|
region: str
|
51
75
|
instance_type: str
|
@@ -68,6 +92,7 @@ class EC2InstanceDetails(BaseModel):
|
|
68
92
|
|
69
93
|
class EC2UsageMetrics(BaseModel):
|
70
94
|
"""EC2 Instance usage metrics from CloudWatch."""
|
95
|
+
|
71
96
|
instance_id: str
|
72
97
|
region: str
|
73
98
|
cpu_utilization_avg: float = 0.0
|
@@ -86,6 +111,7 @@ class EC2UsageMetrics(BaseModel):
|
|
86
111
|
|
87
112
|
class EC2OptimizationResult(BaseModel):
|
88
113
|
"""EC2 Instance optimization analysis results."""
|
114
|
+
|
89
115
|
instance_id: str
|
90
116
|
region: str
|
91
117
|
availability_zone: str
|
@@ -94,33 +120,33 @@ class EC2OptimizationResult(BaseModel):
|
|
94
120
|
launch_time: datetime
|
95
121
|
platform: Optional[str] = None
|
96
122
|
usage_metrics: Optional[EC2UsageMetrics] = None
|
97
|
-
|
123
|
+
|
98
124
|
# Cost analysis
|
99
125
|
hourly_cost: float = 0.0
|
100
126
|
monthly_cost: float = 0.0
|
101
127
|
annual_cost: float = 0.0
|
102
|
-
|
128
|
+
|
103
129
|
# Optimization strategies
|
104
130
|
is_idle: bool = False
|
105
131
|
idle_monthly_savings: float = 0.0
|
106
132
|
idle_annual_savings: float = 0.0
|
107
|
-
|
133
|
+
|
108
134
|
is_underutilized: bool = False
|
109
135
|
rightsizing_recommendation: Optional[str] = None
|
110
136
|
rightsizing_monthly_savings: float = 0.0
|
111
137
|
rightsizing_annual_savings: float = 0.0
|
112
|
-
|
138
|
+
|
113
139
|
lifecycle_optimization: Optional[str] = None # spot, reserved, scheduled
|
114
140
|
lifecycle_monthly_savings: float = 0.0
|
115
141
|
lifecycle_annual_savings: float = 0.0
|
116
|
-
|
142
|
+
|
117
143
|
# Combined optimization
|
118
144
|
optimization_recommendation: str = "retain" # retain, stop_idle, rightsize, lifecycle_optimize, terminate
|
119
145
|
risk_level: str = "low" # low, medium, high
|
120
146
|
business_impact: str = "minimal"
|
121
147
|
total_monthly_savings: float = 0.0
|
122
148
|
total_annual_savings: float = 0.0
|
123
|
-
|
149
|
+
|
124
150
|
# Safety and dependency analysis
|
125
151
|
has_tags: bool = False
|
126
152
|
has_lifetime_tag: bool = False
|
@@ -130,6 +156,7 @@ class EC2OptimizationResult(BaseModel):
|
|
130
156
|
|
131
157
|
class EC2ComputeOptimizerResults(BaseModel):
|
132
158
|
"""Complete EC2 compute optimization analysis results."""
|
159
|
+
|
133
160
|
total_instances: int = 0
|
134
161
|
running_instances: int = 0
|
135
162
|
stopped_instances: int = 0
|
@@ -137,7 +164,7 @@ class EC2ComputeOptimizerResults(BaseModel):
|
|
137
164
|
underutilized_instances: int = 0
|
138
165
|
analyzed_regions: List[str] = Field(default_factory=list)
|
139
166
|
optimization_results: List[EC2OptimizationResult] = Field(default_factory=list)
|
140
|
-
|
167
|
+
|
141
168
|
# Cost breakdown
|
142
169
|
total_monthly_cost: float = 0.0
|
143
170
|
total_annual_cost: float = 0.0
|
@@ -149,7 +176,7 @@ class EC2ComputeOptimizerResults(BaseModel):
|
|
149
176
|
lifecycle_potential_annual_savings: float = 0.0
|
150
177
|
total_potential_monthly_savings: float = 0.0
|
151
178
|
total_potential_annual_savings: float = 0.0
|
152
|
-
|
179
|
+
|
153
180
|
execution_time_seconds: float = 0.0
|
154
181
|
mcp_validation_accuracy: float = 0.0
|
155
182
|
analysis_timestamp: datetime = Field(default_factory=datetime.now)
|
@@ -158,240 +185,242 @@ class EC2ComputeOptimizerResults(BaseModel):
|
|
158
185
|
class EC2ComputeOptimizer:
|
159
186
|
"""
|
160
187
|
EC2 Compute Cost Optimization Engine - Enterprise FinOps Compute Platform
|
161
|
-
|
188
|
+
|
162
189
|
Following $132,720+ methodology with proven FinOps patterns targeting $2M-$8M annual savings:
|
163
190
|
- Multi-region discovery and analysis across enterprise accounts
|
164
191
|
- CloudWatch metrics integration for usage validation and rightsizing
|
165
|
-
- Idle detection with automated stop/terminate recommendations
|
192
|
+
- Idle detection with automated stop/terminate recommendations
|
166
193
|
- Instance lifecycle optimization (spot, reserved instances, scheduling)
|
167
194
|
- Cost calculation with MCP validation (≥99.5% accuracy)
|
168
195
|
- Evidence generation for Manager/Financial/CTO executive reporting
|
169
196
|
- Business-focused naming for executive presentation readiness
|
170
197
|
"""
|
171
|
-
|
198
|
+
|
172
199
|
def __init__(self, profile_name: Optional[str] = None, regions: Optional[List[str]] = None):
|
173
200
|
"""Initialize EC2 compute optimizer with enterprise profile support."""
|
174
201
|
self.profile_name = profile_name
|
175
|
-
self.regions = regions or [
|
176
|
-
|
202
|
+
self.regions = regions or ["us-east-1", "us-west-2", "eu-west-1"]
|
203
|
+
|
177
204
|
# Initialize AWS session with profile priority system
|
178
|
-
self.session = boto3.Session(
|
179
|
-
|
180
|
-
)
|
181
|
-
|
205
|
+
self.session = boto3.Session(profile_name=get_profile_for_operation("operational", profile_name))
|
206
|
+
|
182
207
|
# EC2 pricing (per hour, as of 2024) - approximate for common instance types
|
183
208
|
self.ec2_pricing = {
|
184
209
|
# General Purpose
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
210
|
+
"t3.micro": 0.0104,
|
211
|
+
"t3.small": 0.0208,
|
212
|
+
"t3.medium": 0.0416,
|
213
|
+
"t3.large": 0.0832,
|
214
|
+
"t3.xlarge": 0.1664,
|
215
|
+
"t3.2xlarge": 0.3328,
|
216
|
+
"m5.large": 0.096,
|
217
|
+
"m5.xlarge": 0.192,
|
218
|
+
"m5.2xlarge": 0.384,
|
219
|
+
"m5.4xlarge": 0.768,
|
196
220
|
# Compute Optimized
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
221
|
+
"c5.large": 0.085,
|
222
|
+
"c5.xlarge": 0.17,
|
223
|
+
"c5.2xlarge": 0.34,
|
224
|
+
"c5.4xlarge": 0.68,
|
202
225
|
# Memory Optimized
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
226
|
+
"r5.large": 0.126,
|
227
|
+
"r5.xlarge": 0.252,
|
228
|
+
"r5.2xlarge": 0.504,
|
229
|
+
"r5.4xlarge": 1.008,
|
207
230
|
}
|
208
|
-
|
231
|
+
|
209
232
|
# Usage thresholds for optimization recommendations
|
210
233
|
self.idle_cpu_threshold = 5.0 # CPU utilization % for idle detection
|
211
234
|
self.underutilized_cpu_threshold = 25.0 # CPU utilization % for rightsizing
|
212
235
|
self.analysis_period_days = 14 # CloudWatch analysis period
|
213
|
-
|
236
|
+
|
214
237
|
# Rightsizing recommendations mapping
|
215
238
|
self.rightsizing_map = {
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
239
|
+
"t3.medium": "t3.small",
|
240
|
+
"t3.large": "t3.medium",
|
241
|
+
"t3.xlarge": "t3.large",
|
242
|
+
"m5.xlarge": "m5.large",
|
243
|
+
"m5.2xlarge": "m5.xlarge",
|
244
|
+
"m5.4xlarge": "m5.2xlarge",
|
245
|
+
"c5.xlarge": "c5.large",
|
246
|
+
"c5.2xlarge": "c5.xlarge",
|
247
|
+
"r5.xlarge": "r5.large",
|
248
|
+
"r5.2xlarge": "r5.xlarge",
|
226
249
|
}
|
227
|
-
|
250
|
+
|
228
251
|
async def analyze_ec2_compute(self, dry_run: bool = True) -> EC2ComputeOptimizerResults:
|
229
252
|
"""
|
230
253
|
Comprehensive EC2 compute cost optimization analysis.
|
231
|
-
|
254
|
+
|
232
255
|
Args:
|
233
256
|
dry_run: Safety mode - READ-ONLY analysis only
|
234
|
-
|
257
|
+
|
235
258
|
Returns:
|
236
259
|
Complete analysis results with optimization recommendations
|
237
260
|
"""
|
238
261
|
print_header("EC2 Compute Cost Optimization Engine", "Enterprise Multi-Region Analysis Platform v1.0")
|
239
|
-
|
262
|
+
|
240
263
|
if not dry_run:
|
241
264
|
print_warning("⚠️ Dry-run disabled - This optimizer is READ-ONLY analysis only")
|
242
265
|
print_info("All EC2 operations require manual execution after review")
|
243
|
-
|
266
|
+
|
244
267
|
analysis_start_time = time.time()
|
245
|
-
|
268
|
+
|
246
269
|
try:
|
247
270
|
with create_progress_bar() as progress:
|
248
271
|
# Step 1: Multi-region EC2 instance discovery
|
249
272
|
discovery_task = progress.add_task("Discovering EC2 instances...", total=len(self.regions))
|
250
273
|
instances = await self._discover_ec2_instances_multi_region(progress, discovery_task)
|
251
|
-
|
274
|
+
|
252
275
|
if not instances:
|
253
276
|
print_warning("No EC2 instances found in specified regions")
|
254
277
|
return EC2ComputeOptimizerResults(
|
255
278
|
analyzed_regions=self.regions,
|
256
279
|
analysis_timestamp=datetime.now(),
|
257
|
-
execution_time_seconds=time.time() - analysis_start_time
|
280
|
+
execution_time_seconds=time.time() - analysis_start_time,
|
258
281
|
)
|
259
|
-
|
282
|
+
|
260
283
|
# Step 2: Usage metrics analysis via CloudWatch
|
261
284
|
metrics_task = progress.add_task("Analyzing usage metrics...", total=len(instances))
|
262
285
|
usage_metrics = await self._analyze_usage_metrics(instances, progress, metrics_task)
|
263
|
-
|
286
|
+
|
264
287
|
# Step 3: Cost analysis and pricing calculation
|
265
288
|
costing_task = progress.add_task("Calculating costs...", total=len(instances))
|
266
289
|
cost_analysis = await self._calculate_instance_costs(instances, progress, costing_task)
|
267
|
-
|
290
|
+
|
268
291
|
# Step 4: Comprehensive optimization analysis
|
269
292
|
optimization_task = progress.add_task("Calculating optimization potential...", total=len(instances))
|
270
293
|
optimization_results = await self._calculate_optimization_recommendations(
|
271
294
|
instances, usage_metrics, cost_analysis, progress, optimization_task
|
272
295
|
)
|
273
|
-
|
296
|
+
|
274
297
|
# Step 5: MCP validation
|
275
298
|
validation_task = progress.add_task("MCP validation...", total=1)
|
276
299
|
mcp_accuracy = await self._validate_with_mcp(optimization_results, progress, validation_task)
|
277
|
-
|
300
|
+
|
278
301
|
# Compile comprehensive results with cost breakdowns
|
279
302
|
results = self._compile_results(instances, optimization_results, mcp_accuracy, analysis_start_time)
|
280
|
-
|
303
|
+
|
281
304
|
# Display executive summary
|
282
305
|
self._display_executive_summary(results)
|
283
|
-
|
306
|
+
|
284
307
|
return results
|
285
|
-
|
308
|
+
|
286
309
|
except Exception as e:
|
287
310
|
print_error(f"EC2 compute optimization analysis failed: {e}")
|
288
311
|
logger.error(f"EC2 analysis error: {e}", exc_info=True)
|
289
312
|
raise
|
290
|
-
|
313
|
+
|
291
314
|
async def _discover_ec2_instances_multi_region(self, progress, task_id) -> List[EC2InstanceDetails]:
|
292
315
|
"""Discover EC2 instances across multiple regions."""
|
293
316
|
instances = []
|
294
|
-
|
317
|
+
|
295
318
|
for region in self.regions:
|
296
319
|
try:
|
297
|
-
ec2_client = self.session.client(
|
298
|
-
|
320
|
+
ec2_client = self.session.client("ec2", region_name=region)
|
321
|
+
|
299
322
|
# Get all EC2 instances in region
|
300
|
-
paginator = ec2_client.get_paginator(
|
323
|
+
paginator = ec2_client.get_paginator("describe_instances")
|
301
324
|
page_iterator = paginator.paginate()
|
302
|
-
|
325
|
+
|
303
326
|
for page in page_iterator:
|
304
|
-
for reservation in page.get(
|
305
|
-
for instance in reservation.get(
|
327
|
+
for reservation in page.get("Reservations", []):
|
328
|
+
for instance in reservation.get("Instances", []):
|
306
329
|
# Skip terminated instances
|
307
|
-
if instance.get(
|
330
|
+
if instance.get("State", {}).get("Name") == "terminated":
|
308
331
|
continue
|
309
|
-
|
332
|
+
|
310
333
|
# Extract tags
|
311
|
-
tags = {tag[
|
312
|
-
|
334
|
+
tags = {tag["Key"]: tag["Value"] for tag in instance.get("Tags", [])}
|
335
|
+
|
313
336
|
# Extract security groups
|
314
|
-
security_groups = [sg[
|
315
|
-
|
316
|
-
instances.append(
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
337
|
+
security_groups = [sg["GroupId"] for sg in instance.get("SecurityGroups", [])]
|
338
|
+
|
339
|
+
instances.append(
|
340
|
+
EC2InstanceDetails(
|
341
|
+
instance_id=instance["InstanceId"],
|
342
|
+
region=region,
|
343
|
+
instance_type=instance["InstanceType"],
|
344
|
+
state=instance["State"]["Name"],
|
345
|
+
availability_zone=instance["Placement"]["AvailabilityZone"],
|
346
|
+
launch_time=instance["LaunchTime"],
|
347
|
+
vpc_id=instance.get("VpcId"),
|
348
|
+
subnet_id=instance.get("SubnetId"),
|
349
|
+
public_ip_address=instance.get("PublicIpAddress"),
|
350
|
+
private_ip_address=instance.get("PrivateIpAddress"),
|
351
|
+
platform=instance.get("Platform"),
|
352
|
+
tags=tags,
|
353
|
+
security_groups=security_groups,
|
354
|
+
)
|
355
|
+
)
|
356
|
+
|
357
|
+
print_info(
|
358
|
+
f"Region {region}: {len([i for i in instances if i.region == region])} EC2 instances discovered"
|
359
|
+
)
|
360
|
+
|
334
361
|
except ClientError as e:
|
335
362
|
print_warning(f"Region {region}: Access denied or region unavailable - {e.response['Error']['Code']}")
|
336
363
|
except Exception as e:
|
337
364
|
print_error(f"Region {region}: Discovery error - {str(e)}")
|
338
|
-
|
365
|
+
|
339
366
|
progress.advance(task_id)
|
340
|
-
|
367
|
+
|
341
368
|
return instances
|
342
|
-
|
343
|
-
async def _analyze_usage_metrics(
|
369
|
+
|
370
|
+
async def _analyze_usage_metrics(
|
371
|
+
self, instances: List[EC2InstanceDetails], progress, task_id
|
372
|
+
) -> Dict[str, EC2UsageMetrics]:
|
344
373
|
"""Analyze EC2 instance usage metrics via CloudWatch."""
|
345
374
|
usage_metrics = {}
|
346
375
|
end_time = datetime.utcnow()
|
347
376
|
start_time = end_time - timedelta(days=self.analysis_period_days)
|
348
|
-
|
377
|
+
|
349
378
|
for instance in instances:
|
350
379
|
try:
|
351
380
|
# Skip analysis for non-running instances
|
352
|
-
if instance.state not in [
|
381
|
+
if instance.state not in ["running", "stopped"]:
|
353
382
|
progress.advance(task_id)
|
354
383
|
continue
|
355
|
-
|
356
|
-
cloudwatch = self.session.client(
|
357
|
-
|
384
|
+
|
385
|
+
cloudwatch = self.session.client("cloudwatch", region_name=instance.region)
|
386
|
+
|
358
387
|
# Get CPU utilization metrics
|
359
388
|
cpu_avg = await self._get_cloudwatch_metric(
|
360
|
-
cloudwatch, instance.instance_id,
|
389
|
+
cloudwatch, instance.instance_id, "CPUUtilization", start_time, end_time, "Average"
|
361
390
|
)
|
362
|
-
|
391
|
+
|
363
392
|
cpu_max = await self._get_cloudwatch_metric(
|
364
|
-
cloudwatch, instance.instance_id,
|
393
|
+
cloudwatch, instance.instance_id, "CPUUtilization", start_time, end_time, "Maximum"
|
365
394
|
)
|
366
|
-
|
395
|
+
|
367
396
|
# Get network metrics
|
368
397
|
network_in = await self._get_cloudwatch_metric(
|
369
|
-
cloudwatch, instance.instance_id,
|
398
|
+
cloudwatch, instance.instance_id, "NetworkIn", start_time, end_time, "Sum"
|
370
399
|
)
|
371
|
-
|
400
|
+
|
372
401
|
network_out = await self._get_cloudwatch_metric(
|
373
|
-
cloudwatch, instance.instance_id,
|
402
|
+
cloudwatch, instance.instance_id, "NetworkOut", start_time, end_time, "Sum"
|
374
403
|
)
|
375
|
-
|
404
|
+
|
376
405
|
# Get disk metrics
|
377
406
|
disk_read_ops = await self._get_cloudwatch_metric(
|
378
|
-
cloudwatch, instance.instance_id,
|
407
|
+
cloudwatch, instance.instance_id, "DiskReadOps", start_time, end_time, "Sum"
|
379
408
|
)
|
380
|
-
|
409
|
+
|
381
410
|
disk_write_ops = await self._get_cloudwatch_metric(
|
382
|
-
cloudwatch, instance.instance_id,
|
411
|
+
cloudwatch, instance.instance_id, "DiskWriteOps", start_time, end_time, "Sum"
|
383
412
|
)
|
384
|
-
|
413
|
+
|
385
414
|
# Calculate usage scores and recommendations
|
386
415
|
is_idle = cpu_avg < self.idle_cpu_threshold
|
387
416
|
is_underutilized = cpu_avg < self.underutilized_cpu_threshold and cpu_avg >= self.idle_cpu_threshold
|
388
|
-
|
417
|
+
|
389
418
|
rightsizing_recommendation = None
|
390
419
|
if is_underutilized and instance.instance_type in self.rightsizing_map:
|
391
420
|
rightsizing_recommendation = self.rightsizing_map[instance.instance_type]
|
392
|
-
|
421
|
+
|
393
422
|
usage_score = min(100, cpu_avg * 2) # Simple scoring: CPU utilization * 2
|
394
|
-
|
423
|
+
|
395
424
|
usage_metrics[instance.instance_id] = EC2UsageMetrics(
|
396
425
|
instance_id=instance.instance_id,
|
397
426
|
region=instance.region,
|
@@ -405,9 +434,9 @@ class EC2ComputeOptimizer:
|
|
405
434
|
is_idle=is_idle,
|
406
435
|
is_underutilized=is_underutilized,
|
407
436
|
rightsizing_recommendation=rightsizing_recommendation,
|
408
|
-
usage_score=usage_score
|
437
|
+
usage_score=usage_score,
|
409
438
|
)
|
410
|
-
|
439
|
+
|
411
440
|
except Exception as e:
|
412
441
|
print_warning(f"Metrics unavailable for {instance.instance_id}: {str(e)}")
|
413
442
|
# Create default metrics for instances without CloudWatch access
|
@@ -415,110 +444,107 @@ class EC2ComputeOptimizer:
|
|
415
444
|
instance_id=instance.instance_id,
|
416
445
|
region=instance.region,
|
417
446
|
analysis_period_days=self.analysis_period_days,
|
418
|
-
usage_score=50.0 # Neutral score
|
447
|
+
usage_score=50.0, # Neutral score
|
419
448
|
)
|
420
|
-
|
449
|
+
|
421
450
|
progress.advance(task_id)
|
422
|
-
|
451
|
+
|
423
452
|
return usage_metrics
|
424
|
-
|
425
|
-
async def _get_cloudwatch_metric(
|
426
|
-
|
453
|
+
|
454
|
+
async def _get_cloudwatch_metric(
|
455
|
+
self, cloudwatch, instance_id: str, metric_name: str, start_time: datetime, end_time: datetime, statistic: str
|
456
|
+
) -> float:
|
427
457
|
"""Get CloudWatch metric data for EC2 instance."""
|
428
458
|
try:
|
429
459
|
response = cloudwatch.get_metric_statistics(
|
430
|
-
Namespace=
|
460
|
+
Namespace="AWS/EC2",
|
431
461
|
MetricName=metric_name,
|
432
|
-
Dimensions=[
|
433
|
-
{
|
434
|
-
'Name': 'InstanceId',
|
435
|
-
'Value': instance_id
|
436
|
-
}
|
437
|
-
],
|
462
|
+
Dimensions=[{"Name": "InstanceId", "Value": instance_id}],
|
438
463
|
StartTime=start_time,
|
439
464
|
EndTime=end_time,
|
440
465
|
Period=86400, # Daily data points
|
441
|
-
Statistics=[statistic]
|
466
|
+
Statistics=[statistic],
|
442
467
|
)
|
443
|
-
|
468
|
+
|
444
469
|
# Calculate average over the analysis period
|
445
|
-
if statistic ==
|
446
|
-
total = sum(datapoint[statistic] for datapoint in response.get(
|
447
|
-
count = len(response.get(
|
470
|
+
if statistic == "Average":
|
471
|
+
total = sum(datapoint[statistic] for datapoint in response.get("Datapoints", []))
|
472
|
+
count = len(response.get("Datapoints", []))
|
448
473
|
return total / count if count > 0 else 0.0
|
449
474
|
else:
|
450
475
|
# For Sum and Maximum
|
451
|
-
if statistic ==
|
452
|
-
return max((datapoint[statistic] for datapoint in response.get(
|
476
|
+
if statistic == "Maximum":
|
477
|
+
return max((datapoint[statistic] for datapoint in response.get("Datapoints", [])), default=0.0)
|
453
478
|
else: # Sum
|
454
|
-
return sum(datapoint[statistic] for datapoint in response.get(
|
455
|
-
|
479
|
+
return sum(datapoint[statistic] for datapoint in response.get("Datapoints", []))
|
480
|
+
|
456
481
|
except Exception as e:
|
457
482
|
logger.warning(f"CloudWatch metric {metric_name} unavailable for {instance_id}: {e}")
|
458
483
|
return 0.0
|
459
|
-
|
460
|
-
async def _calculate_instance_costs(
|
484
|
+
|
485
|
+
async def _calculate_instance_costs(
|
486
|
+
self, instances: List[EC2InstanceDetails], progress, task_id
|
487
|
+
) -> Dict[str, Dict[str, float]]:
|
461
488
|
"""Calculate current costs for EC2 instances."""
|
462
489
|
cost_analysis = {}
|
463
|
-
|
490
|
+
|
464
491
|
for instance in instances:
|
465
492
|
try:
|
466
493
|
# Get hourly cost for instance type
|
467
494
|
hourly_cost = self.ec2_pricing.get(instance.instance_type, 0.10) # Default fallback
|
468
|
-
|
495
|
+
|
469
496
|
# Adjust for running vs stopped instances
|
470
|
-
if instance.state ==
|
497
|
+
if instance.state == "running":
|
471
498
|
monthly_cost = hourly_cost * 24 * 30.44 # Average days per month
|
472
499
|
annual_cost = hourly_cost * 24 * 365
|
473
|
-
elif instance.state ==
|
500
|
+
elif instance.state == "stopped":
|
474
501
|
# Stopped instances only pay for EBS storage, not compute
|
475
502
|
monthly_cost = 0.0
|
476
503
|
annual_cost = 0.0
|
477
504
|
else:
|
478
505
|
monthly_cost = 0.0
|
479
506
|
annual_cost = 0.0
|
480
|
-
|
507
|
+
|
481
508
|
cost_analysis[instance.instance_id] = {
|
482
|
-
|
483
|
-
|
484
|
-
|
509
|
+
"hourly_cost": hourly_cost,
|
510
|
+
"monthly_cost": monthly_cost,
|
511
|
+
"annual_cost": annual_cost,
|
485
512
|
}
|
486
|
-
|
513
|
+
|
487
514
|
except Exception as e:
|
488
515
|
print_warning(f"Cost calculation failed for {instance.instance_id}: {str(e)}")
|
489
|
-
cost_analysis[instance.instance_id] = {
|
490
|
-
|
491
|
-
'monthly_cost': 0.0,
|
492
|
-
'annual_cost': 0.0
|
493
|
-
}
|
494
|
-
|
516
|
+
cost_analysis[instance.instance_id] = {"hourly_cost": 0.10, "monthly_cost": 0.0, "annual_cost": 0.0}
|
517
|
+
|
495
518
|
progress.advance(task_id)
|
496
|
-
|
519
|
+
|
497
520
|
return cost_analysis
|
498
|
-
|
499
|
-
async def _calculate_optimization_recommendations(
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
521
|
+
|
522
|
+
async def _calculate_optimization_recommendations(
|
523
|
+
self,
|
524
|
+
instances: List[EC2InstanceDetails],
|
525
|
+
usage_metrics: Dict[str, EC2UsageMetrics],
|
526
|
+
cost_analysis: Dict[str, Dict[str, float]],
|
527
|
+
progress,
|
528
|
+
task_id,
|
529
|
+
) -> List[EC2OptimizationResult]:
|
504
530
|
"""Calculate comprehensive optimization recommendations and potential savings."""
|
505
531
|
optimization_results = []
|
506
|
-
|
532
|
+
|
507
533
|
for instance in instances:
|
508
534
|
try:
|
509
535
|
metrics = usage_metrics.get(instance.instance_id)
|
510
536
|
costs = cost_analysis.get(instance.instance_id, {})
|
511
|
-
|
537
|
+
|
512
538
|
# Extract cost information
|
513
|
-
hourly_cost = costs.get(
|
514
|
-
monthly_cost = costs.get(
|
515
|
-
annual_cost = costs.get(
|
516
|
-
|
539
|
+
hourly_cost = costs.get("hourly_cost", 0.0)
|
540
|
+
monthly_cost = costs.get("monthly_cost", 0.0)
|
541
|
+
annual_cost = costs.get("annual_cost", 0.0)
|
542
|
+
|
517
543
|
# Initialize optimization analysis
|
518
544
|
is_idle = metrics.is_idle if metrics else False
|
519
545
|
is_underutilized = metrics.is_underutilized if metrics else False
|
520
546
|
rightsizing_recommendation = metrics.rightsizing_recommendation if metrics else None
|
521
|
-
|
547
|
+
|
522
548
|
# Calculate potential savings
|
523
549
|
idle_monthly_savings = 0.0
|
524
550
|
idle_annual_savings = 0.0
|
@@ -526,49 +552,49 @@ class EC2ComputeOptimizer:
|
|
526
552
|
rightsizing_annual_savings = 0.0
|
527
553
|
lifecycle_monthly_savings = 0.0
|
528
554
|
lifecycle_annual_savings = 0.0
|
529
|
-
|
555
|
+
|
530
556
|
recommendation = "retain" # Default
|
531
557
|
risk_level = "low"
|
532
558
|
business_impact = "minimal"
|
533
|
-
|
559
|
+
|
534
560
|
# 1. Idle instance analysis
|
535
|
-
if is_idle and instance.state ==
|
561
|
+
if is_idle and instance.state == "running":
|
536
562
|
idle_monthly_savings = monthly_cost
|
537
563
|
idle_annual_savings = annual_cost
|
538
564
|
recommendation = "stop_idle"
|
539
565
|
business_impact = "cost_savings"
|
540
|
-
|
566
|
+
|
541
567
|
# 2. Rightsizing analysis
|
542
568
|
elif is_underutilized and rightsizing_recommendation:
|
543
569
|
# Calculate savings from downsizing
|
544
570
|
current_hourly = hourly_cost
|
545
571
|
new_hourly = self.ec2_pricing.get(rightsizing_recommendation, current_hourly * 0.5)
|
546
572
|
savings_hourly = current_hourly - new_hourly
|
547
|
-
|
573
|
+
|
548
574
|
if savings_hourly > 0:
|
549
575
|
rightsizing_monthly_savings = savings_hourly * 24 * 30.44
|
550
576
|
rightsizing_annual_savings = savings_hourly * 24 * 365
|
551
577
|
recommendation = "rightsize"
|
552
578
|
risk_level = "medium"
|
553
579
|
business_impact = "performance_optimization"
|
554
|
-
|
580
|
+
|
555
581
|
# 3. Lifecycle optimization (simplified analysis)
|
556
|
-
if instance.state ==
|
582
|
+
if instance.state == "running" and not is_idle:
|
557
583
|
# Potential Reserved Instance savings (conservative estimate)
|
558
584
|
lifecycle_monthly_savings = monthly_cost * 0.3 # 30% RI savings estimate
|
559
585
|
lifecycle_annual_savings = annual_cost * 0.3
|
560
|
-
|
586
|
+
|
561
587
|
# Determine primary recommendation
|
562
588
|
total_monthly_savings = max(idle_monthly_savings, rightsizing_monthly_savings)
|
563
589
|
if lifecycle_monthly_savings > total_monthly_savings and total_monthly_savings == 0:
|
564
590
|
total_monthly_savings = lifecycle_monthly_savings
|
565
591
|
recommendation = "lifecycle_optimize"
|
566
592
|
business_impact = "reserved_instances"
|
567
|
-
|
593
|
+
|
568
594
|
# Safety and dependency analysis
|
569
595
|
has_tags = len(instance.tags) > 0
|
570
|
-
has_lifetime_tag =
|
571
|
-
|
596
|
+
has_lifetime_tag = "Lifetime" in instance.tags or "lifetime" in instance.tags
|
597
|
+
|
572
598
|
# Calculate dependency score based on various factors
|
573
599
|
dependency_score = 0.0
|
574
600
|
if instance.public_ip_address:
|
@@ -577,111 +603,118 @@ class EC2ComputeOptimizer:
|
|
577
603
|
dependency_score += 0.2 # Multiple security groups
|
578
604
|
if has_tags:
|
579
605
|
dependency_score += 0.2 # Has tags (likely managed)
|
580
|
-
|
606
|
+
|
581
607
|
# Adjust risk level based on dependencies
|
582
608
|
if dependency_score > 0.5:
|
583
609
|
risk_level = "medium" if risk_level == "low" else "high"
|
584
|
-
|
585
|
-
optimization_results.append(
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
610
|
+
|
611
|
+
optimization_results.append(
|
612
|
+
EC2OptimizationResult(
|
613
|
+
instance_id=instance.instance_id,
|
614
|
+
region=instance.region,
|
615
|
+
availability_zone=instance.availability_zone,
|
616
|
+
instance_type=instance.instance_type,
|
617
|
+
instance_state=instance.state,
|
618
|
+
launch_time=instance.launch_time,
|
619
|
+
platform=instance.platform,
|
620
|
+
usage_metrics=metrics,
|
621
|
+
hourly_cost=hourly_cost,
|
622
|
+
monthly_cost=monthly_cost,
|
623
|
+
annual_cost=annual_cost,
|
624
|
+
is_idle=is_idle,
|
625
|
+
idle_monthly_savings=idle_monthly_savings,
|
626
|
+
idle_annual_savings=idle_annual_savings,
|
627
|
+
is_underutilized=is_underutilized,
|
628
|
+
rightsizing_recommendation=rightsizing_recommendation,
|
629
|
+
rightsizing_monthly_savings=rightsizing_monthly_savings,
|
630
|
+
rightsizing_annual_savings=rightsizing_annual_savings,
|
631
|
+
lifecycle_monthly_savings=lifecycle_monthly_savings,
|
632
|
+
lifecycle_annual_savings=lifecycle_annual_savings,
|
633
|
+
optimization_recommendation=recommendation,
|
634
|
+
risk_level=risk_level,
|
635
|
+
business_impact=business_impact,
|
636
|
+
total_monthly_savings=total_monthly_savings,
|
637
|
+
total_annual_savings=total_monthly_savings * 12,
|
638
|
+
has_tags=has_tags,
|
639
|
+
has_lifetime_tag=has_lifetime_tag,
|
640
|
+
dependency_score=dependency_score,
|
641
|
+
safety_checks={
|
642
|
+
"has_tags": has_tags,
|
643
|
+
"has_lifetime_tag": has_lifetime_tag,
|
644
|
+
"has_public_ip": instance.public_ip_address is not None,
|
645
|
+
"low_dependency": dependency_score < 0.3,
|
646
|
+
},
|
647
|
+
)
|
648
|
+
)
|
649
|
+
|
622
650
|
except Exception as e:
|
623
651
|
print_error(f"Optimization calculation failed for {instance.instance_id}: {str(e)}")
|
624
|
-
|
652
|
+
|
625
653
|
progress.advance(task_id)
|
626
|
-
|
654
|
+
|
627
655
|
return optimization_results
|
628
|
-
|
629
|
-
async def _validate_with_mcp(self, optimization_results: List[EC2OptimizationResult],
|
630
|
-
progress, task_id) -> float:
|
656
|
+
|
657
|
+
async def _validate_with_mcp(self, optimization_results: List[EC2OptimizationResult], progress, task_id) -> float:
|
631
658
|
"""Validate optimization results with embedded MCP validator."""
|
632
659
|
try:
|
633
660
|
# Prepare validation data in FinOps format
|
634
661
|
validation_data = {
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
662
|
+
"total_annual_cost": sum(result.annual_cost for result in optimization_results),
|
663
|
+
"potential_annual_savings": sum(result.total_annual_savings for result in optimization_results),
|
664
|
+
"instances_analyzed": len(optimization_results),
|
665
|
+
"regions_analyzed": list(set(result.region for result in optimization_results)),
|
666
|
+
"analysis_timestamp": datetime.now().isoformat(),
|
640
667
|
}
|
641
|
-
|
668
|
+
|
642
669
|
# Initialize MCP validator if profile is available
|
643
670
|
if self.profile_name:
|
644
671
|
mcp_validator = EmbeddedMCPValidator([self.profile_name])
|
645
672
|
validation_results = await mcp_validator.validate_cost_data_async(validation_data)
|
646
|
-
accuracy = validation_results.get(
|
647
|
-
|
673
|
+
accuracy = validation_results.get("total_accuracy", 0.0)
|
674
|
+
|
648
675
|
if accuracy >= 99.5:
|
649
676
|
print_success(f"MCP Validation: {accuracy:.1f}% accuracy achieved (target: ≥99.5%)")
|
650
677
|
else:
|
651
678
|
print_warning(f"MCP Validation: {accuracy:.1f}% accuracy (target: ≥99.5%)")
|
652
|
-
|
679
|
+
|
653
680
|
progress.advance(task_id)
|
654
681
|
return accuracy
|
655
682
|
else:
|
656
683
|
print_info("MCP validation skipped - no profile specified")
|
657
684
|
progress.advance(task_id)
|
658
685
|
return 0.0
|
659
|
-
|
686
|
+
|
660
687
|
except Exception as e:
|
661
688
|
print_warning(f"MCP validation failed: {str(e)}")
|
662
689
|
progress.advance(task_id)
|
663
690
|
return 0.0
|
664
|
-
|
665
|
-
def _compile_results(
|
666
|
-
|
667
|
-
|
691
|
+
|
692
|
+
def _compile_results(
|
693
|
+
self,
|
694
|
+
instances: List[EC2InstanceDetails],
|
695
|
+
optimization_results: List[EC2OptimizationResult],
|
696
|
+
mcp_accuracy: float,
|
697
|
+
analysis_start_time: float,
|
698
|
+
) -> EC2ComputeOptimizerResults:
|
668
699
|
"""Compile comprehensive EC2 compute optimization results."""
|
669
|
-
|
700
|
+
|
670
701
|
# Count instances by state and optimization opportunity
|
671
|
-
running_instances = len([i for i in instances if i.state ==
|
672
|
-
stopped_instances = len([i for i in instances if i.state ==
|
702
|
+
running_instances = len([i for i in instances if i.state == "running"])
|
703
|
+
stopped_instances = len([i for i in instances if i.state == "stopped"])
|
673
704
|
idle_instances = len([r for r in optimization_results if r.is_idle])
|
674
705
|
underutilized_instances = len([r for r in optimization_results if r.is_underutilized])
|
675
|
-
|
706
|
+
|
676
707
|
# Calculate cost breakdowns
|
677
708
|
total_monthly_cost = sum(result.monthly_cost for result in optimization_results)
|
678
709
|
total_annual_cost = total_monthly_cost * 12
|
679
|
-
|
710
|
+
|
680
711
|
idle_potential_monthly_savings = sum(result.idle_monthly_savings for result in optimization_results)
|
681
|
-
rightsizing_potential_monthly_savings = sum(
|
712
|
+
rightsizing_potential_monthly_savings = sum(
|
713
|
+
result.rightsizing_monthly_savings for result in optimization_results
|
714
|
+
)
|
682
715
|
lifecycle_potential_monthly_savings = sum(result.lifecycle_monthly_savings for result in optimization_results)
|
683
716
|
total_potential_monthly_savings = sum(result.total_monthly_savings for result in optimization_results)
|
684
|
-
|
717
|
+
|
685
718
|
return EC2ComputeOptimizerResults(
|
686
719
|
total_instances=len(instances),
|
687
720
|
running_instances=running_instances,
|
@@ -702,12 +735,12 @@ class EC2ComputeOptimizer:
|
|
702
735
|
total_potential_annual_savings=total_potential_monthly_savings * 12,
|
703
736
|
execution_time_seconds=time.time() - analysis_start_time,
|
704
737
|
mcp_validation_accuracy=mcp_accuracy,
|
705
|
-
analysis_timestamp=datetime.now()
|
738
|
+
analysis_timestamp=datetime.now(),
|
706
739
|
)
|
707
|
-
|
740
|
+
|
708
741
|
def _display_executive_summary(self, results: EC2ComputeOptimizerResults) -> None:
|
709
742
|
"""Display executive summary with Rich CLI formatting."""
|
710
|
-
|
743
|
+
|
711
744
|
# Executive Summary Panel
|
712
745
|
summary_content = f"""
|
713
746
|
💻 Total EC2 Instances: {results.total_instances}
|
@@ -722,22 +755,20 @@ class EC2ComputeOptimizer:
|
|
722
755
|
• Rightsizing: {format_cost(results.rightsizing_potential_annual_savings)}
|
723
756
|
• Lifecycle (RI): {format_cost(results.lifecycle_potential_annual_savings)}
|
724
757
|
|
725
|
-
🌍 Regions Analyzed: {
|
758
|
+
🌍 Regions Analyzed: {", ".join(results.analyzed_regions)}
|
726
759
|
⚡ Analysis Time: {results.execution_time_seconds:.2f}s
|
727
760
|
✅ MCP Accuracy: {results.mcp_validation_accuracy:.1f}%
|
728
761
|
"""
|
729
|
-
|
730
|
-
console.print(
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
))
|
735
|
-
|
736
|
-
# Detailed Results Table
|
737
|
-
table = create_table(
|
738
|
-
title="EC2 Instance Optimization Recommendations"
|
762
|
+
|
763
|
+
console.print(
|
764
|
+
create_panel(
|
765
|
+
summary_content.strip(), title="🏆 EC2 Compute Optimization Executive Summary", border_style="green"
|
766
|
+
)
|
739
767
|
)
|
740
|
-
|
768
|
+
|
769
|
+
# Detailed Results Table
|
770
|
+
table = create_table(title="EC2 Instance Optimization Recommendations")
|
771
|
+
|
741
772
|
table.add_column("Instance ID", style="cyan", no_wrap=True)
|
742
773
|
table.add_column("Region", style="dim")
|
743
774
|
table.add_column("Type", justify="center")
|
@@ -746,39 +777,27 @@ class EC2ComputeOptimizer:
|
|
746
777
|
table.add_column("Potential Savings", justify="right", style="green")
|
747
778
|
table.add_column("Recommendation", justify="center")
|
748
779
|
table.add_column("Risk", justify="center")
|
749
|
-
|
780
|
+
|
750
781
|
# Sort by potential savings (descending)
|
751
|
-
sorted_results = sorted(
|
752
|
-
|
753
|
-
key=lambda x: x.total_annual_savings,
|
754
|
-
reverse=True
|
755
|
-
)
|
756
|
-
|
782
|
+
sorted_results = sorted(results.optimization_results, key=lambda x: x.total_annual_savings, reverse=True)
|
783
|
+
|
757
784
|
# Show top 15 results to avoid overwhelming output
|
758
785
|
display_results = sorted_results[:15]
|
759
|
-
|
786
|
+
|
760
787
|
for result in display_results:
|
761
788
|
# Status indicators for recommendations
|
762
789
|
rec_color = {
|
763
790
|
"stop_idle": "red",
|
764
791
|
"rightsize": "yellow",
|
765
792
|
"lifecycle_optimize": "blue",
|
766
|
-
"retain": "green"
|
793
|
+
"retain": "green",
|
767
794
|
}.get(result.optimization_recommendation, "white")
|
768
|
-
|
769
|
-
risk_indicator = {
|
770
|
-
|
771
|
-
"medium": "🟡",
|
772
|
-
"high": "🔴"
|
773
|
-
}.get(result.risk_level, "⚪")
|
774
|
-
|
795
|
+
|
796
|
+
risk_indicator = {"low": "🟢", "medium": "🟡", "high": "🔴"}.get(result.risk_level, "⚪")
|
797
|
+
|
775
798
|
# Format state
|
776
|
-
state_indicator = {
|
777
|
-
|
778
|
-
"stopped": "🔴",
|
779
|
-
"stopping": "🟡"
|
780
|
-
}.get(result.instance_state, "⚪")
|
781
|
-
|
799
|
+
state_indicator = {"running": "🟢", "stopped": "🔴", "stopping": "🟡"}.get(result.instance_state, "⚪")
|
800
|
+
|
782
801
|
table.add_row(
|
783
802
|
result.instance_id[-8:], # Show last 8 chars
|
784
803
|
result.region,
|
@@ -787,38 +806,36 @@ class EC2ComputeOptimizer:
|
|
787
806
|
format_cost(result.annual_cost),
|
788
807
|
format_cost(result.total_annual_savings) if result.total_annual_savings > 0 else "-",
|
789
808
|
f"[{rec_color}]{result.optimization_recommendation.replace('_', ' ').title()}[/]",
|
790
|
-
f"{risk_indicator} {result.risk_level.title()}"
|
809
|
+
f"{risk_indicator} {result.risk_level.title()}",
|
791
810
|
)
|
792
|
-
|
811
|
+
|
793
812
|
if len(sorted_results) > 15:
|
794
813
|
table.add_row(
|
795
|
-
"...", "...", "...", "...", "...", "...",
|
796
|
-
f"[dim]+{len(sorted_results) - 15} more instances[/]", "..."
|
814
|
+
"...", "...", "...", "...", "...", "...", f"[dim]+{len(sorted_results) - 15} more instances[/]", "..."
|
797
815
|
)
|
798
|
-
|
816
|
+
|
799
817
|
console.print(table)
|
800
818
|
|
801
819
|
|
802
820
|
# CLI Integration for enterprise runbooks commands
|
803
821
|
@click.command()
|
804
|
-
@click.option(
|
805
|
-
@click.option(
|
806
|
-
@click.option(
|
807
|
-
@click.option(
|
808
|
-
help='CloudWatch analysis period in days')
|
822
|
+
@click.option("--profile", help="AWS profile name (3-tier priority: User > Environment > Default)")
|
823
|
+
@click.option("--regions", multiple=True, help="AWS regions to analyze (space-separated)")
|
824
|
+
@click.option("--dry-run/--no-dry-run", default=True, help="Execute in dry-run mode (READ-ONLY analysis)")
|
825
|
+
@click.option("--usage-threshold-days", type=int, default=14, help="CloudWatch analysis period in days")
|
809
826
|
def compute_optimizer(profile, regions, dry_run, usage_threshold_days):
|
810
827
|
"""
|
811
828
|
EC2 Compute Cost Optimizer - Enterprise Multi-Region Analysis
|
812
|
-
|
829
|
+
|
813
830
|
Comprehensive EC2 cost optimization combining multiple strategies:
|
814
831
|
• Idle instance detection and automated stop/terminate recommendations
|
815
832
|
• Usage-based rightsizing with CloudWatch metrics integration
|
816
833
|
• Instance lifecycle optimization (Reserved Instances, Spot instances)
|
817
|
-
|
834
|
+
|
818
835
|
Part of $132,720+ annual savings methodology targeting $2M-$8M compute optimization.
|
819
|
-
|
836
|
+
|
820
837
|
SAFETY: READ-ONLY analysis only - no resource modifications.
|
821
|
-
|
838
|
+
|
822
839
|
Examples:
|
823
840
|
runbooks finops compute --analyze
|
824
841
|
runbooks finops compute --profile my-profile --regions us-east-1 us-west-2
|
@@ -826,18 +843,15 @@ def compute_optimizer(profile, regions, dry_run, usage_threshold_days):
|
|
826
843
|
"""
|
827
844
|
try:
|
828
845
|
# Initialize optimizer
|
829
|
-
optimizer = EC2ComputeOptimizer(
|
830
|
-
|
831
|
-
regions=list(regions) if regions else None
|
832
|
-
)
|
833
|
-
|
846
|
+
optimizer = EC2ComputeOptimizer(profile_name=profile, regions=list(regions) if regions else None)
|
847
|
+
|
834
848
|
# Override analysis period if specified
|
835
849
|
if usage_threshold_days != 14:
|
836
850
|
optimizer.analysis_period_days = usage_threshold_days
|
837
|
-
|
851
|
+
|
838
852
|
# Execute comprehensive analysis
|
839
853
|
results = asyncio.run(optimizer.analyze_ec2_compute(dry_run=dry_run))
|
840
|
-
|
854
|
+
|
841
855
|
# Display final success message
|
842
856
|
if results.total_potential_annual_savings > 0:
|
843
857
|
savings_breakdown = []
|
@@ -847,12 +861,14 @@ def compute_optimizer(profile, regions, dry_run, usage_threshold_days):
|
|
847
861
|
savings_breakdown.append(f"Rightsizing: {format_cost(results.rightsizing_potential_annual_savings)}")
|
848
862
|
if results.lifecycle_potential_annual_savings > 0:
|
849
863
|
savings_breakdown.append(f"Lifecycle: {format_cost(results.lifecycle_potential_annual_savings)}")
|
850
|
-
|
851
|
-
print_success(
|
864
|
+
|
865
|
+
print_success(
|
866
|
+
f"Analysis complete: {format_cost(results.total_potential_annual_savings)} potential annual savings"
|
867
|
+
)
|
852
868
|
print_info(f"Optimization strategies: {' | '.join(savings_breakdown)}")
|
853
869
|
else:
|
854
870
|
print_info("Analysis complete: All EC2 instances are optimally configured")
|
855
|
-
|
871
|
+
|
856
872
|
except KeyboardInterrupt:
|
857
873
|
print_warning("Analysis interrupted by user")
|
858
874
|
raise click.Abort()
|
@@ -861,5 +877,5 @@ def compute_optimizer(profile, regions, dry_run, usage_threshold_days):
|
|
861
877
|
raise click.Abort()
|
862
878
|
|
863
879
|
|
864
|
-
if __name__ ==
|
865
|
-
compute_optimizer()
|
880
|
+
if __name__ == "__main__":
|
881
|
+
compute_optimizer()
|