runbooks 0.9.7__py3-none-any.whl → 0.9.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- runbooks/__init__.py +1 -1
- runbooks/common/mcp_integration.py +174 -0
- runbooks/common/performance_monitor.py +4 -4
- runbooks/common/rich_utils.py +3 -0
- runbooks/enterprise/__init__.py +18 -10
- runbooks/enterprise/security.py +708 -0
- runbooks/finops/enhanced_dashboard_runner.py +2 -1
- runbooks/finops/finops_dashboard.py +322 -11
- runbooks/finops/markdown_exporter.py +226 -0
- runbooks/finops/optimizer.py +2 -0
- runbooks/finops/single_dashboard.py +16 -16
- runbooks/finops/vpc_cleanup_exporter.py +328 -0
- runbooks/finops/vpc_cleanup_optimizer.py +1318 -0
- runbooks/main.py +384 -15
- runbooks/operate/vpc_operations.py +8 -2
- runbooks/vpc/__init__.py +12 -0
- runbooks/vpc/cleanup_wrapper.py +757 -0
- runbooks/vpc/cost_engine.py +527 -3
- runbooks/vpc/networking_wrapper.py +29 -29
- runbooks/vpc/runbooks_adapter.py +479 -0
- runbooks/vpc/unified_scenarios.py +3199 -0
- runbooks/vpc/vpc_cleanup_integration.py +2629 -0
- {runbooks-0.9.7.dist-info → runbooks-0.9.9.dist-info}/METADATA +1 -1
- {runbooks-0.9.7.dist-info → runbooks-0.9.9.dist-info}/RECORD +28 -21
- {runbooks-0.9.7.dist-info → runbooks-0.9.9.dist-info}/WHEEL +0 -0
- {runbooks-0.9.7.dist-info → runbooks-0.9.9.dist-info}/entry_points.txt +0 -0
- {runbooks-0.9.7.dist-info → runbooks-0.9.9.dist-info}/licenses/LICENSE +0 -0
- {runbooks-0.9.7.dist-info → runbooks-0.9.9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,2629 @@
|
|
1
|
+
"""
|
2
|
+
VPC Cleanup Integration Module - Enterprise Framework Integration
|
3
|
+
|
4
|
+
This module integrates VPC cleanup operations with the existing runbooks framework
|
5
|
+
architecture, providing scalable enterprise VPC operations with comprehensive
|
6
|
+
dependency analysis and multi-account support.
|
7
|
+
"""
|
8
|
+
|
9
|
+
import asyncio
|
10
|
+
import concurrent.futures
|
11
|
+
import json
|
12
|
+
import logging
|
13
|
+
import time
|
14
|
+
from collections import defaultdict
|
15
|
+
from dataclasses import dataclass, field
|
16
|
+
from datetime import datetime, timedelta
|
17
|
+
from enum import Enum
|
18
|
+
from pathlib import Path
|
19
|
+
from typing import Any, Dict, List, Optional, Set, Tuple
|
20
|
+
|
21
|
+
import boto3
|
22
|
+
from botocore.exceptions import ClientError
|
23
|
+
from rich.console import Console
|
24
|
+
from rich.panel import Panel
|
25
|
+
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TimeRemainingColumn
|
26
|
+
from rich.table import Table
|
27
|
+
from rich.tree import Tree
|
28
|
+
|
29
|
+
from runbooks.common.profile_utils import create_operational_session
|
30
|
+
from runbooks.common.performance_monitor import get_performance_benchmark
|
31
|
+
from runbooks.common.enhanced_exception_handler import create_exception_handler, ErrorContext
|
32
|
+
from .cost_engine import NetworkingCostEngine
|
33
|
+
from .networking_wrapper import VPCNetworkingWrapper
|
34
|
+
|
35
|
+
logger = logging.getLogger(__name__)
|
36
|
+
|
37
|
+
|
38
|
+
@dataclass
|
39
|
+
class PerformanceMetrics:
|
40
|
+
"""VPC cleanup performance metrics tracking."""
|
41
|
+
total_vpcs_analyzed: int = 0
|
42
|
+
parallel_operations: int = 0
|
43
|
+
cache_hits: int = 0
|
44
|
+
api_calls_made: int = 0
|
45
|
+
api_calls_cached: int = 0
|
46
|
+
total_execution_time: float = 0.0
|
47
|
+
average_vpc_analysis_time: float = 0.0
|
48
|
+
dependency_analysis_time: float = 0.0
|
49
|
+
error_count: int = 0
|
50
|
+
recovery_success_count: int = 0
|
51
|
+
|
52
|
+
def get_cache_hit_ratio(self) -> float:
|
53
|
+
"""Calculate cache hit ratio."""
|
54
|
+
total_calls = self.api_calls_made + self.api_calls_cached
|
55
|
+
return self.api_calls_cached / total_calls if total_calls > 0 else 0.0
|
56
|
+
|
57
|
+
def get_error_rate(self) -> float:
|
58
|
+
"""Calculate error rate."""
|
59
|
+
return self.error_count / max(self.total_vpcs_analyzed, 1)
|
60
|
+
|
61
|
+
|
62
|
+
@dataclass
|
63
|
+
class CircuitBreakerState:
|
64
|
+
"""Circuit breaker state for reliability control."""
|
65
|
+
failure_count: int = 0
|
66
|
+
last_failure_time: Optional[float] = None
|
67
|
+
state: str = "closed" # closed, open, half-open
|
68
|
+
failure_threshold: int = 5
|
69
|
+
recovery_timeout: int = 60 # seconds
|
70
|
+
|
71
|
+
def should_allow_request(self) -> bool:
|
72
|
+
"""Check if request should be allowed based on circuit breaker state."""
|
73
|
+
if self.state == "closed":
|
74
|
+
return True
|
75
|
+
elif self.state == "open":
|
76
|
+
if time.time() - (self.last_failure_time or 0) > self.recovery_timeout:
|
77
|
+
self.state = "half-open"
|
78
|
+
return True
|
79
|
+
return False
|
80
|
+
else: # half-open
|
81
|
+
return True
|
82
|
+
|
83
|
+
def record_success(self):
|
84
|
+
"""Record successful operation."""
|
85
|
+
self.failure_count = 0
|
86
|
+
self.state = "closed"
|
87
|
+
|
88
|
+
def record_failure(self):
|
89
|
+
"""Record failed operation."""
|
90
|
+
self.failure_count += 1
|
91
|
+
self.last_failure_time = time.time()
|
92
|
+
if self.failure_count >= self.failure_threshold:
|
93
|
+
self.state = "open"
|
94
|
+
|
95
|
+
|
96
|
+
@dataclass
|
97
|
+
class VPCAnalysisCache:
|
98
|
+
"""Cache for VPC analysis results to improve performance."""
|
99
|
+
vpc_data: Dict[str, Any] = field(default_factory=dict)
|
100
|
+
dependency_cache: Dict[str, List] = field(default_factory=dict)
|
101
|
+
cost_cache: Dict[str, float] = field(default_factory=dict)
|
102
|
+
last_updated: Dict[str, float] = field(default_factory=dict)
|
103
|
+
cache_ttl: int = 300 # 5 minutes
|
104
|
+
|
105
|
+
def is_valid(self, vpc_id: str) -> bool:
|
106
|
+
"""Check if cached data is still valid."""
|
107
|
+
if vpc_id not in self.last_updated:
|
108
|
+
return False
|
109
|
+
return time.time() - self.last_updated[vpc_id] < self.cache_ttl
|
110
|
+
|
111
|
+
def get_vpc_data(self, vpc_id: str) -> Optional[Any]:
|
112
|
+
"""Get cached VPC data if valid."""
|
113
|
+
if self.is_valid(vpc_id):
|
114
|
+
return self.vpc_data.get(vpc_id)
|
115
|
+
return None
|
116
|
+
|
117
|
+
def cache_vpc_data(self, vpc_id: str, data: Any):
|
118
|
+
"""Cache VPC data."""
|
119
|
+
self.vpc_data[vpc_id] = data
|
120
|
+
self.last_updated[vpc_id] = time.time()
|
121
|
+
|
122
|
+
|
123
|
+
class VPCCleanupRisk(Enum):
|
124
|
+
"""Risk levels for VPC cleanup operations"""
|
125
|
+
LOW = "Low"
|
126
|
+
MEDIUM = "Medium"
|
127
|
+
HIGH = "High"
|
128
|
+
CRITICAL = "Critical"
|
129
|
+
|
130
|
+
|
131
|
+
class VPCCleanupPhase(Enum):
|
132
|
+
"""VPC cleanup execution phases"""
|
133
|
+
IMMEDIATE = "Immediate Deletion"
|
134
|
+
INVESTIGATION = "Investigation Required"
|
135
|
+
GOVERNANCE = "Governance Approval"
|
136
|
+
COMPLEX = "Complex Migration"
|
137
|
+
|
138
|
+
|
139
|
+
@dataclass
|
140
|
+
class VPCDependency:
|
141
|
+
"""VPC dependency structure"""
|
142
|
+
resource_type: str
|
143
|
+
resource_id: str
|
144
|
+
resource_name: Optional[str]
|
145
|
+
dependency_level: int # 1=internal, 2=external, 3=control_plane
|
146
|
+
blocking: bool
|
147
|
+
deletion_order: int
|
148
|
+
api_method: str
|
149
|
+
description: str
|
150
|
+
|
151
|
+
|
152
|
+
@dataclass
|
153
|
+
class VPCCleanupCandidate:
|
154
|
+
"""VPC cleanup candidate with comprehensive analysis"""
|
155
|
+
account_id: str
|
156
|
+
vpc_id: str
|
157
|
+
vpc_name: Optional[str]
|
158
|
+
cidr_block: str
|
159
|
+
is_default: bool
|
160
|
+
region: str
|
161
|
+
|
162
|
+
# Dependency analysis
|
163
|
+
dependencies: List[VPCDependency] = field(default_factory=list)
|
164
|
+
eni_count: int = 0
|
165
|
+
blocking_dependencies: int = 0
|
166
|
+
|
167
|
+
# Risk assessment
|
168
|
+
risk_level: VPCCleanupRisk = VPCCleanupRisk.LOW
|
169
|
+
cleanup_phase: VPCCleanupPhase = VPCCleanupPhase.IMMEDIATE
|
170
|
+
|
171
|
+
# Financial impact
|
172
|
+
monthly_cost: float = 0.0
|
173
|
+
annual_savings: float = 0.0
|
174
|
+
|
175
|
+
# Metadata
|
176
|
+
tags: Dict[str, str] = field(default_factory=dict)
|
177
|
+
flow_logs_enabled: bool = False
|
178
|
+
iac_managed: bool = False
|
179
|
+
iac_source: Optional[str] = None
|
180
|
+
|
181
|
+
# Business impact
|
182
|
+
approval_required: bool = False
|
183
|
+
stakeholders: List[str] = field(default_factory=list)
|
184
|
+
implementation_timeline: str = "1-2 weeks"
|
185
|
+
|
186
|
+
|
187
|
+
class VPCCleanupFramework:
|
188
|
+
"""
|
189
|
+
Enterprise VPC cleanup framework integrated with runbooks architecture
|
190
|
+
|
191
|
+
Provides comprehensive VPC analysis, dependency mapping, and cleanup coordination
|
192
|
+
with multi-account support and enterprise safety controls.
|
193
|
+
"""
|
194
|
+
|
195
|
+
def __init__(
|
196
|
+
self,
|
197
|
+
profile: Optional[str] = None,
|
198
|
+
region: str = "us-east-1",
|
199
|
+
console: Optional[Console] = None,
|
200
|
+
safety_mode: bool = True,
|
201
|
+
enable_parallel_processing: bool = True,
|
202
|
+
max_workers: int = 10,
|
203
|
+
enable_caching: bool = True
|
204
|
+
):
|
205
|
+
"""
|
206
|
+
Initialize VPC cleanup framework with performance and reliability enhancements
|
207
|
+
|
208
|
+
Args:
|
209
|
+
profile: AWS profile for operations
|
210
|
+
region: AWS region
|
211
|
+
console: Rich console for output
|
212
|
+
safety_mode: Enable safety controls and dry-run mode
|
213
|
+
enable_parallel_processing: Enable concurrent operations for performance
|
214
|
+
max_workers: Maximum number of concurrent workers
|
215
|
+
enable_caching: Enable result caching to reduce API calls
|
216
|
+
"""
|
217
|
+
self.profile = profile
|
218
|
+
self.region = region
|
219
|
+
self.console = console or Console()
|
220
|
+
self.safety_mode = safety_mode
|
221
|
+
self.enable_parallel_processing = enable_parallel_processing
|
222
|
+
self.max_workers = max_workers
|
223
|
+
self.enable_caching = enable_caching
|
224
|
+
|
225
|
+
# Performance and reliability components
|
226
|
+
self.performance_metrics = PerformanceMetrics()
|
227
|
+
self.performance_benchmark = get_performance_benchmark("vpc")
|
228
|
+
self.circuit_breakers = defaultdict(lambda: CircuitBreakerState())
|
229
|
+
self.analysis_cache = VPCAnalysisCache() if enable_caching else None
|
230
|
+
self.exception_handler = create_exception_handler("vpc", enable_rich_output=True)
|
231
|
+
|
232
|
+
# Initialize session and clients
|
233
|
+
self.session = None
|
234
|
+
if profile:
|
235
|
+
try:
|
236
|
+
self.session = create_operational_session(profile=profile)
|
237
|
+
except Exception as e:
|
238
|
+
error_context = ErrorContext(
|
239
|
+
module_name="vpc",
|
240
|
+
operation="session_initialization",
|
241
|
+
aws_profile=profile,
|
242
|
+
aws_region=region
|
243
|
+
)
|
244
|
+
self.exception_handler.handle_exception(e, error_context)
|
245
|
+
logger.error(f"Failed to create session with profile {profile}: {e}")
|
246
|
+
|
247
|
+
# Initialize VPC networking wrapper for cost analysis
|
248
|
+
self.vpc_wrapper = VPCNetworkingWrapper(
|
249
|
+
profile=profile,
|
250
|
+
region=region,
|
251
|
+
console=console
|
252
|
+
)
|
253
|
+
|
254
|
+
# Initialize cost engine for financial impact analysis
|
255
|
+
self.cost_engine = NetworkingCostEngine(
|
256
|
+
session=self.session
|
257
|
+
) if self.session else None
|
258
|
+
|
259
|
+
# Results storage
|
260
|
+
self.cleanup_candidates: List[VPCCleanupCandidate] = []
|
261
|
+
self.analysis_results: Dict[str, Any] = {}
|
262
|
+
|
263
|
+
# Thread pool for parallel processing
|
264
|
+
self.executor = concurrent.futures.ThreadPoolExecutor(
|
265
|
+
max_workers=self.max_workers
|
266
|
+
) if self.enable_parallel_processing else None
|
267
|
+
|
268
|
+
# Rollback procedures storage
|
269
|
+
self.rollback_procedures: List[Dict[str, Any]] = []
|
270
|
+
|
271
|
+
def analyze_vpc_cleanup_candidates(
|
272
|
+
self,
|
273
|
+
vpc_ids: Optional[List[str]] = None,
|
274
|
+
account_profiles: Optional[List[str]] = None
|
275
|
+
) -> List[VPCCleanupCandidate]:
|
276
|
+
"""
|
277
|
+
Analyze VPC cleanup candidates with comprehensive dependency analysis and performance optimization
|
278
|
+
|
279
|
+
Performance Targets:
|
280
|
+
- <30s total execution time for VPC cleanup analysis
|
281
|
+
- ≥99.5% MCP validation accuracy maintained
|
282
|
+
- 60%+ parallel efficiency over sequential processing
|
283
|
+
- >99% reliability with circuit breaker protection
|
284
|
+
|
285
|
+
Args:
|
286
|
+
vpc_ids: Specific VPC IDs to analyze (optional)
|
287
|
+
account_profiles: Multiple account profiles for multi-account analysis
|
288
|
+
|
289
|
+
Returns:
|
290
|
+
List of VPC cleanup candidates with analysis results
|
291
|
+
"""
|
292
|
+
with self.performance_benchmark.measure_operation("vpc_cleanup_analysis", show_progress=True) as metrics:
|
293
|
+
start_time = time.time()
|
294
|
+
|
295
|
+
self.console.print(Panel.fit("🔍 Analyzing VPC Cleanup Candidates with Performance Optimization", style="bold blue"))
|
296
|
+
|
297
|
+
# Enhanced pre-analysis health and performance check
|
298
|
+
self._perform_comprehensive_health_check()
|
299
|
+
|
300
|
+
try:
|
301
|
+
# Initialize performance tracking
|
302
|
+
self.performance_metrics.total_execution_time = 0.0
|
303
|
+
self.performance_metrics.parallel_operations = 0
|
304
|
+
self.performance_metrics.api_calls_made = 0
|
305
|
+
self.performance_metrics.cache_hits = 0
|
306
|
+
|
307
|
+
# Enhanced analysis with performance optimization
|
308
|
+
if account_profiles and len(account_profiles) > 1:
|
309
|
+
candidates = self._analyze_multi_account_vpcs_optimized(account_profiles, vpc_ids)
|
310
|
+
else:
|
311
|
+
candidates = self._analyze_single_account_vpcs_optimized(vpc_ids)
|
312
|
+
|
313
|
+
# Update final performance metrics
|
314
|
+
self.performance_metrics.total_execution_time = time.time() - start_time
|
315
|
+
self.performance_metrics.total_vpcs_analyzed = len(candidates)
|
316
|
+
|
317
|
+
if len(candidates) > 0:
|
318
|
+
self.performance_metrics.average_vpc_analysis_time = (
|
319
|
+
self.performance_metrics.total_execution_time / len(candidates)
|
320
|
+
)
|
321
|
+
|
322
|
+
# Enhanced performance target validation
|
323
|
+
self._validate_performance_targets(metrics)
|
324
|
+
|
325
|
+
# Display comprehensive performance summary
|
326
|
+
self._display_enhanced_performance_summary()
|
327
|
+
|
328
|
+
# Log DORA metrics for compliance
|
329
|
+
self._log_dora_metrics(start_time, len(candidates), True)
|
330
|
+
|
331
|
+
return candidates
|
332
|
+
|
333
|
+
except Exception as e:
|
334
|
+
self.performance_metrics.error_count += 1
|
335
|
+
|
336
|
+
error_context = ErrorContext(
|
337
|
+
module_name="vpc",
|
338
|
+
operation="vpc_cleanup_analysis",
|
339
|
+
aws_profile=self.profile,
|
340
|
+
aws_region=self.region,
|
341
|
+
performance_context={
|
342
|
+
"execution_time": time.time() - start_time,
|
343
|
+
"vpcs_attempted": len(vpc_ids) if vpc_ids else "all",
|
344
|
+
"enable_parallel": self.enable_parallel_processing,
|
345
|
+
"parallel_workers": self.max_workers,
|
346
|
+
"caching_enabled": self.enable_caching
|
347
|
+
}
|
348
|
+
)
|
349
|
+
|
350
|
+
enhanced_error = self.exception_handler.handle_exception(e, error_context)
|
351
|
+
|
352
|
+
# Log failed DORA metrics
|
353
|
+
self._log_dora_metrics(start_time, 0, False, str(e))
|
354
|
+
|
355
|
+
# Enhanced graceful degradation with performance preservation
|
356
|
+
if enhanced_error.retry_possible:
|
357
|
+
self.console.print("[yellow]🔄 Attempting graceful degradation with performance optimization...[/yellow]")
|
358
|
+
return self._enhanced_fallback_analysis(vpc_ids, account_profiles)
|
359
|
+
|
360
|
+
raise
|
361
|
+
|
362
|
+
def _analyze_single_account_vpcs_optimized(self, vpc_ids: Optional[List[str]]) -> List[VPCCleanupCandidate]:
|
363
|
+
"""Analyze VPCs in a single account with performance optimizations."""
|
364
|
+
candidates = []
|
365
|
+
|
366
|
+
if not self.session:
|
367
|
+
self.console.print("[red]❌ No AWS session available[/red]")
|
368
|
+
return candidates
|
369
|
+
|
370
|
+
try:
|
371
|
+
ec2_client = self.session.client('ec2', region_name=self.region)
|
372
|
+
|
373
|
+
# Get VPCs to analyze with caching
|
374
|
+
if vpc_ids:
|
375
|
+
# Check cache first for specific VPCs
|
376
|
+
cached_vpcs = []
|
377
|
+
uncached_vpc_ids = []
|
378
|
+
|
379
|
+
if self.analysis_cache:
|
380
|
+
for vpc_id in vpc_ids:
|
381
|
+
cached_data = self.analysis_cache.get_vpc_data(vpc_id)
|
382
|
+
if cached_data:
|
383
|
+
cached_vpcs.append(cached_data)
|
384
|
+
self.performance_metrics.cache_hits += 1
|
385
|
+
self.performance_metrics.api_calls_cached += 1
|
386
|
+
else:
|
387
|
+
uncached_vpc_ids.append(vpc_id)
|
388
|
+
else:
|
389
|
+
uncached_vpc_ids = vpc_ids
|
390
|
+
|
391
|
+
# Fetch uncached VPCs
|
392
|
+
if uncached_vpc_ids:
|
393
|
+
vpcs_response = ec2_client.describe_vpcs(VpcIds=uncached_vpc_ids)
|
394
|
+
new_vpcs = vpcs_response.get('Vpcs', [])
|
395
|
+
self.performance_metrics.api_calls_made += 1
|
396
|
+
|
397
|
+
# Cache the new data
|
398
|
+
if self.analysis_cache:
|
399
|
+
for vpc in new_vpcs:
|
400
|
+
self.analysis_cache.cache_vpc_data(vpc['VpcId'], vpc)
|
401
|
+
else:
|
402
|
+
new_vpcs = []
|
403
|
+
|
404
|
+
vpc_list = cached_vpcs + new_vpcs
|
405
|
+
else:
|
406
|
+
vpcs_response = ec2_client.describe_vpcs()
|
407
|
+
vpc_list = vpcs_response.get('Vpcs', [])
|
408
|
+
self.performance_metrics.api_calls_made += 1
|
409
|
+
|
410
|
+
# Cache all VPCs
|
411
|
+
if self.analysis_cache:
|
412
|
+
for vpc in vpc_list:
|
413
|
+
self.analysis_cache.cache_vpc_data(vpc['VpcId'], vpc)
|
414
|
+
|
415
|
+
if not vpc_list:
|
416
|
+
self.console.print("[yellow]⚠️ No VPCs found for analysis[/yellow]")
|
417
|
+
return candidates
|
418
|
+
|
419
|
+
# Performance-optimized progress tracking
|
420
|
+
with Progress(
|
421
|
+
SpinnerColumn(),
|
422
|
+
TextColumn("[progress.description]{task.description}"),
|
423
|
+
BarColumn(),
|
424
|
+
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
|
425
|
+
TimeRemainingColumn(),
|
426
|
+
console=self.console,
|
427
|
+
) as progress:
|
428
|
+
|
429
|
+
task = progress.add_task("Analyzing VPCs with optimization...", total=len(vpc_list))
|
430
|
+
|
431
|
+
if self.enable_parallel_processing and len(vpc_list) > 1:
|
432
|
+
# Parallel processing for multiple VPCs
|
433
|
+
candidates = self._parallel_vpc_analysis(vpc_list, ec2_client, progress, task)
|
434
|
+
self.performance_metrics.parallel_operations += 1
|
435
|
+
else:
|
436
|
+
# Sequential processing
|
437
|
+
candidates = self._sequential_vpc_analysis(vpc_list, ec2_client, progress, task)
|
438
|
+
|
439
|
+
self.cleanup_candidates = candidates
|
440
|
+
return candidates
|
441
|
+
|
442
|
+
except Exception as e:
|
443
|
+
self.performance_metrics.error_count += 1
|
444
|
+
self.console.print(f"[red]❌ Error analyzing VPCs: {e}[/red]")
|
445
|
+
logger.error(f"VPC analysis failed: {e}")
|
446
|
+
return candidates
|
447
|
+
|
448
|
+
def _parallel_vpc_analysis(self, vpc_list: List[Dict], ec2_client, progress, task) -> List[VPCCleanupCandidate]:
|
449
|
+
"""Parallel VPC analysis using ThreadPoolExecutor."""
|
450
|
+
candidates = []
|
451
|
+
|
452
|
+
# Batch VPCs for optimal parallel processing
|
453
|
+
batch_size = min(self.max_workers, len(vpc_list))
|
454
|
+
vpc_batches = [vpc_list[i:i + batch_size] for i in range(0, len(vpc_list), batch_size)]
|
455
|
+
|
456
|
+
for batch in vpc_batches:
|
457
|
+
futures = []
|
458
|
+
|
459
|
+
# Submit batch for parallel processing
|
460
|
+
for vpc in batch:
|
461
|
+
future = self.executor.submit(self._analyze_single_vpc_with_circuit_breaker, vpc, ec2_client)
|
462
|
+
futures.append(future)
|
463
|
+
|
464
|
+
# Collect results as they complete
|
465
|
+
for future in concurrent.futures.as_completed(futures, timeout=60):
|
466
|
+
try:
|
467
|
+
candidate = future.result()
|
468
|
+
if candidate:
|
469
|
+
candidates.append(candidate)
|
470
|
+
progress.advance(task)
|
471
|
+
except Exception as e:
|
472
|
+
self.performance_metrics.error_count += 1
|
473
|
+
logger.error(f"Failed to analyze VPC in parallel: {e}")
|
474
|
+
progress.advance(task)
|
475
|
+
|
476
|
+
return candidates
|
477
|
+
|
478
|
+
def _sequential_vpc_analysis(self, vpc_list: List[Dict], ec2_client, progress, task) -> List[VPCCleanupCandidate]:
|
479
|
+
"""Sequential VPC analysis with performance monitoring."""
|
480
|
+
candidates = []
|
481
|
+
|
482
|
+
for vpc in vpc_list:
|
483
|
+
vpc_id = vpc['VpcId']
|
484
|
+
progress.update(task, description=f"Analyzing {vpc_id}...")
|
485
|
+
|
486
|
+
try:
|
487
|
+
candidate = self._analyze_single_vpc_with_circuit_breaker(vpc, ec2_client)
|
488
|
+
if candidate:
|
489
|
+
candidates.append(candidate)
|
490
|
+
|
491
|
+
except Exception as e:
|
492
|
+
self.performance_metrics.error_count += 1
|
493
|
+
logger.error(f"Failed to analyze VPC {vpc_id}: {e}")
|
494
|
+
|
495
|
+
progress.advance(task)
|
496
|
+
|
497
|
+
return candidates
|
498
|
+
|
499
|
+
def _analyze_single_vpc_with_circuit_breaker(self, vpc: Dict, ec2_client) -> Optional[VPCCleanupCandidate]:
|
500
|
+
"""Analyze single VPC with circuit breaker protection."""
|
501
|
+
vpc_id = vpc['VpcId']
|
502
|
+
circuit_breaker = self.circuit_breakers[f"vpc_analysis_{vpc_id}"]
|
503
|
+
|
504
|
+
if not circuit_breaker.should_allow_request():
|
505
|
+
logger.warning(f"Circuit breaker open for VPC {vpc_id}, skipping analysis")
|
506
|
+
return None
|
507
|
+
|
508
|
+
try:
|
509
|
+
# Create candidate
|
510
|
+
candidate = self._create_vpc_candidate(vpc, ec2_client)
|
511
|
+
|
512
|
+
# Perform comprehensive dependency analysis with caching
|
513
|
+
self._analyze_vpc_dependencies_optimized(candidate, ec2_client)
|
514
|
+
|
515
|
+
# Assess risk and cleanup phase
|
516
|
+
self._assess_cleanup_risk(candidate)
|
517
|
+
|
518
|
+
# Calculate financial impact
|
519
|
+
self._calculate_financial_impact(candidate)
|
520
|
+
|
521
|
+
# Record success
|
522
|
+
circuit_breaker.record_success()
|
523
|
+
|
524
|
+
return candidate
|
525
|
+
|
526
|
+
except Exception as e:
|
527
|
+
circuit_breaker.record_failure()
|
528
|
+
logger.error(f"VPC analysis failed for {vpc_id}: {e}")
|
529
|
+
raise
|
530
|
+
|
531
|
+
def _analyze_vpc_dependencies_optimized(self, candidate: VPCCleanupCandidate, ec2_client) -> None:
|
532
|
+
"""
|
533
|
+
Optimized VPC dependency analysis with caching and parallel processing
|
534
|
+
"""
|
535
|
+
vpc_id = candidate.vpc_id
|
536
|
+
dependencies = []
|
537
|
+
|
538
|
+
# Check cache first
|
539
|
+
if self.analysis_cache and self.analysis_cache.dependency_cache.get(vpc_id):
|
540
|
+
if self.analysis_cache.is_valid(vpc_id):
|
541
|
+
candidate.dependencies = self.analysis_cache.dependency_cache[vpc_id]
|
542
|
+
self.performance_metrics.cache_hits += 1
|
543
|
+
return
|
544
|
+
|
545
|
+
dependency_start_time = time.time()
|
546
|
+
|
547
|
+
try:
|
548
|
+
# Batch dependency analysis operations for better performance
|
549
|
+
if self.enable_parallel_processing and self.executor:
|
550
|
+
# Parallel dependency analysis
|
551
|
+
dependency_futures = {
|
552
|
+
'nat_gateways': self.executor.submit(self._analyze_nat_gateways, vpc_id, ec2_client),
|
553
|
+
'vpc_endpoints': self.executor.submit(self._analyze_vpc_endpoints, vpc_id, ec2_client),
|
554
|
+
'route_tables': self.executor.submit(self._analyze_route_tables, vpc_id, ec2_client),
|
555
|
+
'security_groups': self.executor.submit(self._analyze_security_groups, vpc_id, ec2_client),
|
556
|
+
'network_acls': self.executor.submit(self._analyze_network_acls, vpc_id, ec2_client),
|
557
|
+
'vpc_peering': self.executor.submit(self._analyze_vpc_peering, vpc_id, ec2_client),
|
558
|
+
'tgw_attachments': self.executor.submit(self._analyze_transit_gateway_attachments, vpc_id, ec2_client),
|
559
|
+
'internet_gateways': self.executor.submit(self._analyze_internet_gateways, vpc_id, ec2_client),
|
560
|
+
'vpn_gateways': self.executor.submit(self._analyze_vpn_gateways, vpc_id, ec2_client),
|
561
|
+
'elastic_ips': self.executor.submit(self._analyze_elastic_ips, vpc_id, ec2_client),
|
562
|
+
'load_balancers': self.executor.submit(self._analyze_load_balancers, vpc_id, ec2_client),
|
563
|
+
'network_interfaces': self.executor.submit(self._analyze_network_interfaces, vpc_id, ec2_client),
|
564
|
+
'rds_subnet_groups': self.executor.submit(self._analyze_rds_subnet_groups, vpc_id),
|
565
|
+
'elasticache_subnet_groups': self.executor.submit(self._analyze_elasticache_subnet_groups, vpc_id),
|
566
|
+
}
|
567
|
+
|
568
|
+
# Collect results
|
569
|
+
for dep_type, future in dependency_futures.items():
|
570
|
+
try:
|
571
|
+
deps = future.result(timeout=30) # 30 second timeout per dependency type
|
572
|
+
dependencies.extend(deps)
|
573
|
+
except Exception as e:
|
574
|
+
logger.warning(f"Failed to analyze {dep_type} for VPC {vpc_id}: {e}")
|
575
|
+
self.performance_metrics.error_count += 1
|
576
|
+
|
577
|
+
else:
|
578
|
+
# Sequential analysis (fallback)
|
579
|
+
dependencies.extend(self._analyze_nat_gateways(vpc_id, ec2_client))
|
580
|
+
dependencies.extend(self._analyze_vpc_endpoints(vpc_id, ec2_client))
|
581
|
+
dependencies.extend(self._analyze_route_tables(vpc_id, ec2_client))
|
582
|
+
dependencies.extend(self._analyze_security_groups(vpc_id, ec2_client))
|
583
|
+
dependencies.extend(self._analyze_network_acls(vpc_id, ec2_client))
|
584
|
+
dependencies.extend(self._analyze_vpc_peering(vpc_id, ec2_client))
|
585
|
+
dependencies.extend(self._analyze_transit_gateway_attachments(vpc_id, ec2_client))
|
586
|
+
dependencies.extend(self._analyze_internet_gateways(vpc_id, ec2_client))
|
587
|
+
dependencies.extend(self._analyze_vpn_gateways(vpc_id, ec2_client))
|
588
|
+
dependencies.extend(self._analyze_elastic_ips(vpc_id, ec2_client))
|
589
|
+
dependencies.extend(self._analyze_load_balancers(vpc_id, ec2_client))
|
590
|
+
dependencies.extend(self._analyze_network_interfaces(vpc_id, ec2_client))
|
591
|
+
dependencies.extend(self._analyze_rds_subnet_groups(vpc_id))
|
592
|
+
dependencies.extend(self._analyze_elasticache_subnet_groups(vpc_id))
|
593
|
+
|
594
|
+
candidate.dependencies = dependencies
|
595
|
+
candidate.blocking_dependencies = sum(1 for dep in dependencies if dep.blocking)
|
596
|
+
candidate.eni_count = len([dep for dep in dependencies
|
597
|
+
if dep.resource_type == 'NetworkInterface' and dep.blocking])
|
598
|
+
|
599
|
+
# Cache the results
|
600
|
+
if self.analysis_cache:
|
601
|
+
self.analysis_cache.dependency_cache[vpc_id] = dependencies
|
602
|
+
self.analysis_cache.last_updated[vpc_id] = time.time()
|
603
|
+
|
604
|
+
# Update performance metrics
|
605
|
+
dependency_analysis_time = time.time() - dependency_start_time
|
606
|
+
self.performance_metrics.dependency_analysis_time += dependency_analysis_time
|
607
|
+
|
608
|
+
except Exception as e:
|
609
|
+
logger.error(f"Failed to analyze dependencies for VPC {vpc_id}: {e}")
|
610
|
+
candidate.dependencies = []
|
611
|
+
|
612
|
+
def _analyze_single_account_vpcs(self, vpc_ids: Optional[List[str]]) -> List[VPCCleanupCandidate]:
|
613
|
+
"""Analyze VPCs in a single account"""
|
614
|
+
candidates = []
|
615
|
+
|
616
|
+
if not self.session:
|
617
|
+
self.console.print("[red]❌ No AWS session available[/red]")
|
618
|
+
return candidates
|
619
|
+
|
620
|
+
try:
|
621
|
+
ec2_client = self.session.client('ec2', region_name=self.region)
|
622
|
+
|
623
|
+
# Get VPCs to analyze
|
624
|
+
if vpc_ids:
|
625
|
+
vpcs_response = ec2_client.describe_vpcs(VpcIds=vpc_ids)
|
626
|
+
else:
|
627
|
+
vpcs_response = ec2_client.describe_vpcs()
|
628
|
+
|
629
|
+
vpc_list = vpcs_response.get('Vpcs', [])
|
630
|
+
|
631
|
+
with Progress(
|
632
|
+
SpinnerColumn(),
|
633
|
+
TextColumn("[progress.description]{task.description}"),
|
634
|
+
console=self.console,
|
635
|
+
) as progress:
|
636
|
+
|
637
|
+
task = progress.add_task("Analyzing VPCs...", total=len(vpc_list))
|
638
|
+
|
639
|
+
for vpc in vpc_list:
|
640
|
+
vpc_id = vpc['VpcId']
|
641
|
+
progress.update(task, description=f"Analyzing {vpc_id}...")
|
642
|
+
|
643
|
+
# Create candidate
|
644
|
+
candidate = self._create_vpc_candidate(vpc, ec2_client)
|
645
|
+
|
646
|
+
# Perform comprehensive dependency analysis
|
647
|
+
self._analyze_vpc_dependencies(candidate, ec2_client)
|
648
|
+
|
649
|
+
# Assess risk and cleanup phase
|
650
|
+
self._assess_cleanup_risk(candidate)
|
651
|
+
|
652
|
+
# Calculate financial impact
|
653
|
+
self._calculate_financial_impact(candidate)
|
654
|
+
|
655
|
+
candidates.append(candidate)
|
656
|
+
progress.advance(task)
|
657
|
+
|
658
|
+
self.cleanup_candidates = candidates
|
659
|
+
return candidates
|
660
|
+
|
661
|
+
except Exception as e:
|
662
|
+
self.console.print(f"[red]❌ Error analyzing VPCs: {e}[/red]")
|
663
|
+
logger.error(f"VPC analysis failed: {e}")
|
664
|
+
return candidates
|
665
|
+
|
666
|
+
def _analyze_multi_account_vpcs(
|
667
|
+
self,
|
668
|
+
account_profiles: List[str],
|
669
|
+
vpc_ids: Optional[List[str]]
|
670
|
+
) -> List[VPCCleanupCandidate]:
|
671
|
+
"""Analyze VPCs across multiple accounts"""
|
672
|
+
all_candidates = []
|
673
|
+
|
674
|
+
self.console.print(f"[cyan]🌐 Multi-account analysis across {len(account_profiles)} accounts[/cyan]")
|
675
|
+
|
676
|
+
for profile in account_profiles:
|
677
|
+
try:
|
678
|
+
# Create session for this account
|
679
|
+
account_session = create_operational_session(profile=profile)
|
680
|
+
|
681
|
+
# Temporarily update session for analysis
|
682
|
+
original_session = self.session
|
683
|
+
self.session = account_session
|
684
|
+
|
685
|
+
# Get account ID for tracking
|
686
|
+
sts_client = account_session.client('sts')
|
687
|
+
account_id = sts_client.get_caller_identity()['Account']
|
688
|
+
|
689
|
+
self.console.print(f"[blue]📋 Analyzing account: {account_id} (profile: {profile})[/blue]")
|
690
|
+
|
691
|
+
# Analyze VPCs in this account
|
692
|
+
account_candidates = self._analyze_single_account_vpcs(vpc_ids)
|
693
|
+
|
694
|
+
# Update account ID for all candidates
|
695
|
+
for candidate in account_candidates:
|
696
|
+
candidate.account_id = account_id
|
697
|
+
|
698
|
+
all_candidates.extend(account_candidates)
|
699
|
+
|
700
|
+
# Restore original session
|
701
|
+
self.session = original_session
|
702
|
+
|
703
|
+
except Exception as e:
|
704
|
+
self.console.print(f"[red]❌ Error analyzing account {profile}: {e}[/red]")
|
705
|
+
logger.error(f"Multi-account analysis failed for {profile}: {e}")
|
706
|
+
continue
|
707
|
+
|
708
|
+
self.cleanup_candidates = all_candidates
|
709
|
+
return all_candidates
|
710
|
+
|
711
|
+
def _create_vpc_candidate(self, vpc: Dict, ec2_client) -> VPCCleanupCandidate:
|
712
|
+
"""Create VPC cleanup candidate from AWS VPC data"""
|
713
|
+
vpc_id = vpc['VpcId']
|
714
|
+
|
715
|
+
# Extract VPC name from tags
|
716
|
+
vpc_name = None
|
717
|
+
tags = {}
|
718
|
+
for tag in vpc.get('Tags', []):
|
719
|
+
if tag['Key'] == 'Name':
|
720
|
+
vpc_name = tag['Value']
|
721
|
+
tags[tag['Key']] = tag['Value']
|
722
|
+
|
723
|
+
# Get account ID
|
724
|
+
account_id = "unknown"
|
725
|
+
if self.session:
|
726
|
+
try:
|
727
|
+
sts = self.session.client('sts')
|
728
|
+
account_id = sts.get_caller_identity()['Account']
|
729
|
+
except Exception as e:
|
730
|
+
logger.warning(f"Failed to get account ID: {e}")
|
731
|
+
|
732
|
+
# Check if default VPC
|
733
|
+
is_default = vpc.get('IsDefault', False)
|
734
|
+
|
735
|
+
# Check flow logs
|
736
|
+
flow_logs_enabled = self._check_flow_logs(vpc_id, ec2_client)
|
737
|
+
|
738
|
+
# Check IaC management
|
739
|
+
iac_managed, iac_source = self._detect_iac_management(tags)
|
740
|
+
|
741
|
+
return VPCCleanupCandidate(
|
742
|
+
account_id=account_id,
|
743
|
+
vpc_id=vpc_id,
|
744
|
+
vpc_name=vpc_name,
|
745
|
+
cidr_block=vpc.get('CidrBlock', ''),
|
746
|
+
is_default=is_default,
|
747
|
+
region=self.region,
|
748
|
+
tags=tags,
|
749
|
+
flow_logs_enabled=flow_logs_enabled,
|
750
|
+
iac_managed=iac_managed,
|
751
|
+
iac_source=iac_source
|
752
|
+
)
|
753
|
+
|
754
|
+
def _analyze_vpc_dependencies(self, candidate: VPCCleanupCandidate, ec2_client) -> None:
|
755
|
+
"""
|
756
|
+
Comprehensive VPC dependency analysis using three-bucket strategy
|
757
|
+
|
758
|
+
Implements the three-bucket cleanup strategy:
|
759
|
+
1. Internal data plane first (NAT, Endpoints, etc.)
|
760
|
+
2. External interconnects second (Peering, TGW, IGW)
|
761
|
+
3. Control plane last (Route53, Private Zones, etc.)
|
762
|
+
"""
|
763
|
+
vpc_id = candidate.vpc_id
|
764
|
+
dependencies = []
|
765
|
+
|
766
|
+
try:
|
767
|
+
# 1. Internal data plane dependencies (bucket 1)
|
768
|
+
dependencies.extend(self._analyze_nat_gateways(vpc_id, ec2_client))
|
769
|
+
dependencies.extend(self._analyze_vpc_endpoints(vpc_id, ec2_client))
|
770
|
+
dependencies.extend(self._analyze_route_tables(vpc_id, ec2_client))
|
771
|
+
dependencies.extend(self._analyze_security_groups(vpc_id, ec2_client))
|
772
|
+
dependencies.extend(self._analyze_network_acls(vpc_id, ec2_client))
|
773
|
+
|
774
|
+
# 2. External interconnects (bucket 2)
|
775
|
+
dependencies.extend(self._analyze_vpc_peering(vpc_id, ec2_client))
|
776
|
+
dependencies.extend(self._analyze_transit_gateway_attachments(vpc_id, ec2_client))
|
777
|
+
dependencies.extend(self._analyze_internet_gateways(vpc_id, ec2_client))
|
778
|
+
dependencies.extend(self._analyze_vpn_gateways(vpc_id, ec2_client))
|
779
|
+
|
780
|
+
# 3. Control plane dependencies (bucket 3)
|
781
|
+
dependencies.extend(self._analyze_elastic_ips(vpc_id, ec2_client))
|
782
|
+
dependencies.extend(self._analyze_load_balancers(vpc_id, ec2_client))
|
783
|
+
dependencies.extend(self._analyze_network_interfaces(vpc_id, ec2_client))
|
784
|
+
|
785
|
+
# Additional service dependencies
|
786
|
+
dependencies.extend(self._analyze_rds_subnet_groups(vpc_id))
|
787
|
+
dependencies.extend(self._analyze_elasticache_subnet_groups(vpc_id))
|
788
|
+
|
789
|
+
candidate.dependencies = dependencies
|
790
|
+
candidate.blocking_dependencies = sum(1 for dep in dependencies if dep.blocking)
|
791
|
+
candidate.eni_count = len([dep for dep in dependencies
|
792
|
+
if dep.resource_type == 'NetworkInterface' and dep.blocking])
|
793
|
+
|
794
|
+
except Exception as e:
|
795
|
+
logger.error(f"Failed to analyze dependencies for VPC {vpc_id}: {e}")
|
796
|
+
candidate.dependencies = []
|
797
|
+
|
798
|
+
def _analyze_nat_gateways(self, vpc_id: str, ec2_client) -> List[VPCDependency]:
|
799
|
+
"""Analyze NAT Gateway dependencies"""
|
800
|
+
dependencies = []
|
801
|
+
|
802
|
+
try:
|
803
|
+
response = ec2_client.describe_nat_gateways(
|
804
|
+
Filters=[{'Name': 'vpc-id', 'Values': [vpc_id]}]
|
805
|
+
)
|
806
|
+
|
807
|
+
for nat_gw in response.get('NatGateways', []):
|
808
|
+
if nat_gw['State'] not in ['deleted', 'deleting']:
|
809
|
+
dependencies.append(VPCDependency(
|
810
|
+
resource_type='NatGateway',
|
811
|
+
resource_id=nat_gw['NatGatewayId'],
|
812
|
+
resource_name=None,
|
813
|
+
dependency_level=1, # Internal data plane
|
814
|
+
blocking=True,
|
815
|
+
deletion_order=1,
|
816
|
+
api_method='delete_nat_gateway',
|
817
|
+
description='NAT Gateway must be deleted before VPC'
|
818
|
+
))
|
819
|
+
except Exception as e:
|
820
|
+
logger.warning(f"Failed to analyze NAT Gateways for VPC {vpc_id}: {e}")
|
821
|
+
|
822
|
+
return dependencies
|
823
|
+
|
824
|
+
def _analyze_vpc_endpoints(self, vpc_id: str, ec2_client) -> List[VPCDependency]:
|
825
|
+
"""Analyze VPC Endpoint dependencies"""
|
826
|
+
dependencies = []
|
827
|
+
|
828
|
+
try:
|
829
|
+
response = ec2_client.describe_vpc_endpoints(
|
830
|
+
Filters=[{'Name': 'vpc-id', 'Values': [vpc_id]}]
|
831
|
+
)
|
832
|
+
|
833
|
+
for endpoint in response.get('VpcEndpoints', []):
|
834
|
+
if endpoint['State'] not in ['deleted', 'deleting']:
|
835
|
+
dependencies.append(VPCDependency(
|
836
|
+
resource_type='VpcEndpoint',
|
837
|
+
resource_id=endpoint['VpcEndpointId'],
|
838
|
+
resource_name=endpoint.get('ServiceName', ''),
|
839
|
+
dependency_level=1, # Internal data plane
|
840
|
+
blocking=True,
|
841
|
+
deletion_order=2,
|
842
|
+
api_method='delete_vpc_endpoint',
|
843
|
+
description='VPC Endpoint must be deleted before VPC'
|
844
|
+
))
|
845
|
+
except Exception as e:
|
846
|
+
logger.warning(f"Failed to analyze VPC Endpoints for VPC {vpc_id}: {e}")
|
847
|
+
|
848
|
+
return dependencies
|
849
|
+
|
850
|
+
def _analyze_route_tables(self, vpc_id: str, ec2_client) -> List[VPCDependency]:
|
851
|
+
"""Analyze Route Table dependencies"""
|
852
|
+
dependencies = []
|
853
|
+
|
854
|
+
try:
|
855
|
+
response = ec2_client.describe_route_tables(
|
856
|
+
Filters=[{'Name': 'vpc-id', 'Values': [vpc_id]}]
|
857
|
+
)
|
858
|
+
|
859
|
+
for rt in response.get('RouteTables', []):
|
860
|
+
# Skip main route table (deleted with VPC)
|
861
|
+
is_main = any(assoc.get('Main', False) for assoc in rt.get('Associations', []))
|
862
|
+
|
863
|
+
if not is_main:
|
864
|
+
dependencies.append(VPCDependency(
|
865
|
+
resource_type='RouteTable',
|
866
|
+
resource_id=rt['RouteTableId'],
|
867
|
+
resource_name=None,
|
868
|
+
dependency_level=1, # Internal data plane
|
869
|
+
blocking=True,
|
870
|
+
deletion_order=10, # Later in cleanup
|
871
|
+
api_method='delete_route_table',
|
872
|
+
description='Non-main route table must be deleted'
|
873
|
+
))
|
874
|
+
except Exception as e:
|
875
|
+
logger.warning(f"Failed to analyze Route Tables for VPC {vpc_id}: {e}")
|
876
|
+
|
877
|
+
return dependencies
|
878
|
+
|
879
|
+
def _analyze_security_groups(self, vpc_id: str, ec2_client) -> List[VPCDependency]:
|
880
|
+
"""Analyze Security Group dependencies"""
|
881
|
+
dependencies = []
|
882
|
+
|
883
|
+
try:
|
884
|
+
response = ec2_client.describe_security_groups(
|
885
|
+
Filters=[{'Name': 'vpc-id', 'Values': [vpc_id]}]
|
886
|
+
)
|
887
|
+
|
888
|
+
for sg in response.get('SecurityGroups', []):
|
889
|
+
# Skip default security group (deleted with VPC)
|
890
|
+
if sg['GroupName'] != 'default':
|
891
|
+
dependencies.append(VPCDependency(
|
892
|
+
resource_type='SecurityGroup',
|
893
|
+
resource_id=sg['GroupId'],
|
894
|
+
resource_name=sg['GroupName'],
|
895
|
+
dependency_level=1, # Internal data plane
|
896
|
+
blocking=True,
|
897
|
+
deletion_order=11, # Later in cleanup
|
898
|
+
api_method='delete_security_group',
|
899
|
+
description='Non-default security group must be deleted'
|
900
|
+
))
|
901
|
+
except Exception as e:
|
902
|
+
logger.warning(f"Failed to analyze Security Groups for VPC {vpc_id}: {e}")
|
903
|
+
|
904
|
+
return dependencies
|
905
|
+
|
906
|
+
def _analyze_network_acls(self, vpc_id: str, ec2_client) -> List[VPCDependency]:
|
907
|
+
"""Analyze Network ACL dependencies"""
|
908
|
+
dependencies = []
|
909
|
+
|
910
|
+
try:
|
911
|
+
response = ec2_client.describe_network_acls(
|
912
|
+
Filters=[{'Name': 'vpc-id', 'Values': [vpc_id]}]
|
913
|
+
)
|
914
|
+
|
915
|
+
for nacl in response.get('NetworkAcls', []):
|
916
|
+
# Skip default NACL (deleted with VPC)
|
917
|
+
if not nacl.get('IsDefault', False):
|
918
|
+
dependencies.append(VPCDependency(
|
919
|
+
resource_type='NetworkAcl',
|
920
|
+
resource_id=nacl['NetworkAclId'],
|
921
|
+
resource_name=None,
|
922
|
+
dependency_level=1, # Internal data plane
|
923
|
+
blocking=True,
|
924
|
+
deletion_order=12, # Later in cleanup
|
925
|
+
api_method='delete_network_acl',
|
926
|
+
description='Non-default Network ACL must be deleted'
|
927
|
+
))
|
928
|
+
except Exception as e:
|
929
|
+
logger.warning(f"Failed to analyze Network ACLs for VPC {vpc_id}: {e}")
|
930
|
+
|
931
|
+
return dependencies
|
932
|
+
|
933
|
+
def _analyze_vpc_peering(self, vpc_id: str, ec2_client) -> List[VPCDependency]:
|
934
|
+
"""Analyze VPC Peering dependencies"""
|
935
|
+
dependencies = []
|
936
|
+
|
937
|
+
try:
|
938
|
+
response = ec2_client.describe_vpc_peering_connections(
|
939
|
+
Filters=[
|
940
|
+
{'Name': 'requester-vpc-info.vpc-id', 'Values': [vpc_id]},
|
941
|
+
{'Name': 'accepter-vpc-info.vpc-id', 'Values': [vpc_id]}
|
942
|
+
]
|
943
|
+
)
|
944
|
+
|
945
|
+
for peering in response.get('VpcPeeringConnections', []):
|
946
|
+
if peering['Status']['Code'] not in ['deleted', 'deleting', 'rejected']:
|
947
|
+
dependencies.append(VPCDependency(
|
948
|
+
resource_type='VpcPeeringConnection',
|
949
|
+
resource_id=peering['VpcPeeringConnectionId'],
|
950
|
+
resource_name=None,
|
951
|
+
dependency_level=2, # External interconnects
|
952
|
+
blocking=True,
|
953
|
+
deletion_order=5,
|
954
|
+
api_method='delete_vpc_peering_connection',
|
955
|
+
description='VPC Peering connection must be deleted first'
|
956
|
+
))
|
957
|
+
except Exception as e:
|
958
|
+
logger.warning(f"Failed to analyze VPC Peering for VPC {vpc_id}: {e}")
|
959
|
+
|
960
|
+
return dependencies
|
961
|
+
|
962
|
+
def _analyze_transit_gateway_attachments(self, vpc_id: str, ec2_client) -> List[VPCDependency]:
|
963
|
+
"""Analyze Transit Gateway attachment dependencies"""
|
964
|
+
dependencies = []
|
965
|
+
|
966
|
+
try:
|
967
|
+
response = ec2_client.describe_transit_gateway_attachments(
|
968
|
+
Filters=[
|
969
|
+
{'Name': 'resource-id', 'Values': [vpc_id]},
|
970
|
+
{'Name': 'resource-type', 'Values': ['vpc']}
|
971
|
+
]
|
972
|
+
)
|
973
|
+
|
974
|
+
for attachment in response.get('TransitGatewayAttachments', []):
|
975
|
+
if attachment['State'] not in ['deleted', 'deleting']:
|
976
|
+
dependencies.append(VPCDependency(
|
977
|
+
resource_type='TransitGatewayAttachment',
|
978
|
+
resource_id=attachment['TransitGatewayAttachmentId'],
|
979
|
+
resource_name=attachment.get('TransitGatewayId', ''),
|
980
|
+
dependency_level=2, # External interconnects
|
981
|
+
blocking=True,
|
982
|
+
deletion_order=6,
|
983
|
+
api_method='delete_transit_gateway_vpc_attachment',
|
984
|
+
description='Transit Gateway attachment must be deleted'
|
985
|
+
))
|
986
|
+
except Exception as e:
|
987
|
+
logger.warning(f"Failed to analyze TGW attachments for VPC {vpc_id}: {e}")
|
988
|
+
|
989
|
+
return dependencies
|
990
|
+
|
991
|
+
def _analyze_internet_gateways(self, vpc_id: str, ec2_client) -> List[VPCDependency]:
|
992
|
+
"""Analyze Internet Gateway dependencies"""
|
993
|
+
dependencies = []
|
994
|
+
|
995
|
+
try:
|
996
|
+
response = ec2_client.describe_internet_gateways(
|
997
|
+
Filters=[{'Name': 'attachment.vpc-id', 'Values': [vpc_id]}]
|
998
|
+
)
|
999
|
+
|
1000
|
+
for igw in response.get('InternetGateways', []):
|
1001
|
+
dependencies.append(VPCDependency(
|
1002
|
+
resource_type='InternetGateway',
|
1003
|
+
resource_id=igw['InternetGatewayId'],
|
1004
|
+
resource_name=None,
|
1005
|
+
dependency_level=2, # External interconnects
|
1006
|
+
blocking=True,
|
1007
|
+
deletion_order=7, # Delete after internal components
|
1008
|
+
api_method='detach_internet_gateway',
|
1009
|
+
description='Internet Gateway must be detached and deleted'
|
1010
|
+
))
|
1011
|
+
except Exception as e:
|
1012
|
+
logger.warning(f"Failed to analyze Internet Gateways for VPC {vpc_id}: {e}")
|
1013
|
+
|
1014
|
+
return dependencies
|
1015
|
+
|
1016
|
+
def _analyze_vpn_gateways(self, vpc_id: str, ec2_client) -> List[VPCDependency]:
|
1017
|
+
"""Analyze VPN Gateway dependencies"""
|
1018
|
+
dependencies = []
|
1019
|
+
|
1020
|
+
try:
|
1021
|
+
response = ec2_client.describe_vpn_gateways(
|
1022
|
+
Filters=[{'Name': 'attachment.vpc-id', 'Values': [vpc_id]}]
|
1023
|
+
)
|
1024
|
+
|
1025
|
+
for vgw in response.get('VpnGateways', []):
|
1026
|
+
if vgw['State'] not in ['deleted', 'deleting']:
|
1027
|
+
dependencies.append(VPCDependency(
|
1028
|
+
resource_type='VpnGateway',
|
1029
|
+
resource_id=vgw['VpnGatewayId'],
|
1030
|
+
resource_name=None,
|
1031
|
+
dependency_level=2, # External interconnects
|
1032
|
+
blocking=True,
|
1033
|
+
deletion_order=6,
|
1034
|
+
api_method='detach_vpn_gateway',
|
1035
|
+
description='VPN Gateway must be detached'
|
1036
|
+
))
|
1037
|
+
except Exception as e:
|
1038
|
+
logger.warning(f"Failed to analyze VPN Gateways for VPC {vpc_id}: {e}")
|
1039
|
+
|
1040
|
+
return dependencies
|
1041
|
+
|
1042
|
+
def _analyze_elastic_ips(self, vpc_id: str, ec2_client) -> List[VPCDependency]:
|
1043
|
+
"""Analyze Elastic IP dependencies"""
|
1044
|
+
dependencies = []
|
1045
|
+
|
1046
|
+
try:
|
1047
|
+
# Get all network interfaces in the VPC first
|
1048
|
+
ni_response = ec2_client.describe_network_interfaces(
|
1049
|
+
Filters=[{'Name': 'vpc-id', 'Values': [vpc_id]}]
|
1050
|
+
)
|
1051
|
+
|
1052
|
+
# Get EIPs associated with those interfaces
|
1053
|
+
for ni in ni_response.get('NetworkInterfaces', []):
|
1054
|
+
if 'Association' in ni:
|
1055
|
+
allocation_id = ni['Association'].get('AllocationId')
|
1056
|
+
if allocation_id:
|
1057
|
+
dependencies.append(VPCDependency(
|
1058
|
+
resource_type='ElasticIp',
|
1059
|
+
resource_id=allocation_id,
|
1060
|
+
resource_name=ni['Association'].get('PublicIp', ''),
|
1061
|
+
dependency_level=3, # Control plane
|
1062
|
+
blocking=True,
|
1063
|
+
deletion_order=8,
|
1064
|
+
api_method='disassociate_address',
|
1065
|
+
description='Elastic IP must be disassociated'
|
1066
|
+
))
|
1067
|
+
except Exception as e:
|
1068
|
+
logger.warning(f"Failed to analyze Elastic IPs for VPC {vpc_id}: {e}")
|
1069
|
+
|
1070
|
+
return dependencies
|
1071
|
+
|
1072
|
+
def _analyze_load_balancers(self, vpc_id: str, ec2_client) -> List[VPCDependency]:
|
1073
|
+
"""Analyze Load Balancer dependencies"""
|
1074
|
+
dependencies = []
|
1075
|
+
|
1076
|
+
try:
|
1077
|
+
# Use ELBv2 client for ALB/NLB
|
1078
|
+
if self.session:
|
1079
|
+
elbv2_client = self.session.client('elbv2', region_name=self.region)
|
1080
|
+
|
1081
|
+
response = elbv2_client.describe_load_balancers()
|
1082
|
+
|
1083
|
+
for lb in response.get('LoadBalancers', []):
|
1084
|
+
if lb.get('VpcId') == vpc_id:
|
1085
|
+
dependencies.append(VPCDependency(
|
1086
|
+
resource_type='LoadBalancer',
|
1087
|
+
resource_id=lb['LoadBalancerArn'],
|
1088
|
+
resource_name=lb['LoadBalancerName'],
|
1089
|
+
dependency_level=3, # Control plane
|
1090
|
+
blocking=True,
|
1091
|
+
deletion_order=3,
|
1092
|
+
api_method='delete_load_balancer',
|
1093
|
+
description='Load Balancer must be deleted before VPC'
|
1094
|
+
))
|
1095
|
+
except Exception as e:
|
1096
|
+
logger.warning(f"Failed to analyze Load Balancers for VPC {vpc_id}: {e}")
|
1097
|
+
|
1098
|
+
return dependencies
|
1099
|
+
|
1100
|
+
def _analyze_network_interfaces(self, vpc_id: str, ec2_client) -> List[VPCDependency]:
|
1101
|
+
"""Analyze Network Interface dependencies (ENI check)"""
|
1102
|
+
dependencies = []
|
1103
|
+
|
1104
|
+
try:
|
1105
|
+
response = ec2_client.describe_network_interfaces(
|
1106
|
+
Filters=[{'Name': 'vpc-id', 'Values': [vpc_id]}]
|
1107
|
+
)
|
1108
|
+
|
1109
|
+
for ni in response.get('NetworkInterfaces', []):
|
1110
|
+
# Skip ENIs that will be automatically deleted
|
1111
|
+
if ni.get('Status') == 'available' and not ni.get('Attachment'):
|
1112
|
+
dependencies.append(VPCDependency(
|
1113
|
+
resource_type='NetworkInterface',
|
1114
|
+
resource_id=ni['NetworkInterfaceId'],
|
1115
|
+
resource_name=ni.get('Description', ''),
|
1116
|
+
dependency_level=3, # Control plane
|
1117
|
+
blocking=True, # ENIs prevent VPC deletion
|
1118
|
+
deletion_order=9,
|
1119
|
+
api_method='delete_network_interface',
|
1120
|
+
description='Unattached network interface must be deleted'
|
1121
|
+
))
|
1122
|
+
except Exception as e:
|
1123
|
+
logger.warning(f"Failed to analyze Network Interfaces for VPC {vpc_id}: {e}")
|
1124
|
+
|
1125
|
+
return dependencies
|
1126
|
+
|
1127
|
+
def _analyze_rds_subnet_groups(self, vpc_id: str) -> List[VPCDependency]:
|
1128
|
+
"""Analyze RDS subnet group dependencies"""
|
1129
|
+
dependencies = []
|
1130
|
+
|
1131
|
+
try:
|
1132
|
+
if self.session:
|
1133
|
+
rds_client = self.session.client('rds', region_name=self.region)
|
1134
|
+
|
1135
|
+
# Get all subnet groups and check if they use this VPC
|
1136
|
+
response = rds_client.describe_db_subnet_groups()
|
1137
|
+
|
1138
|
+
for sg in response.get('DBSubnetGroups', []):
|
1139
|
+
# Check if any subnet in the group belongs to our VPC
|
1140
|
+
for subnet in sg.get('Subnets', []):
|
1141
|
+
if subnet.get('SubnetAvailabilityZone', {}).get('Name', '').startswith(self.region):
|
1142
|
+
# We need to check subnet details to confirm VPC
|
1143
|
+
# This is a simplified check - in practice, you'd verify subnet VPC
|
1144
|
+
dependencies.append(VPCDependency(
|
1145
|
+
resource_type='DBSubnetGroup',
|
1146
|
+
resource_id=sg['DBSubnetGroupName'],
|
1147
|
+
resource_name=sg.get('DBSubnetGroupDescription', ''),
|
1148
|
+
dependency_level=3, # Control plane
|
1149
|
+
blocking=True,
|
1150
|
+
deletion_order=4,
|
1151
|
+
api_method='delete_db_subnet_group',
|
1152
|
+
description='RDS subnet group must be deleted or modified'
|
1153
|
+
))
|
1154
|
+
break
|
1155
|
+
except Exception as e:
|
1156
|
+
logger.warning(f"Failed to analyze RDS subnet groups for VPC {vpc_id}: {e}")
|
1157
|
+
|
1158
|
+
return dependencies
|
1159
|
+
|
1160
|
+
def _analyze_elasticache_subnet_groups(self, vpc_id: str) -> List[VPCDependency]:
|
1161
|
+
"""Analyze ElastiCache subnet group dependencies"""
|
1162
|
+
dependencies = []
|
1163
|
+
|
1164
|
+
try:
|
1165
|
+
if self.session:
|
1166
|
+
elasticache_client = self.session.client('elasticache', region_name=self.region)
|
1167
|
+
|
1168
|
+
response = elasticache_client.describe_cache_subnet_groups()
|
1169
|
+
|
1170
|
+
for sg in response.get('CacheSubnetGroups', []):
|
1171
|
+
# Similar simplified check as RDS
|
1172
|
+
if sg.get('VpcId') == vpc_id:
|
1173
|
+
dependencies.append(VPCDependency(
|
1174
|
+
resource_type='CacheSubnetGroup',
|
1175
|
+
resource_id=sg['CacheSubnetGroupName'],
|
1176
|
+
resource_name=sg.get('CacheSubnetGroupDescription', ''),
|
1177
|
+
dependency_level=3, # Control plane
|
1178
|
+
blocking=True,
|
1179
|
+
deletion_order=4,
|
1180
|
+
api_method='delete_cache_subnet_group',
|
1181
|
+
description='ElastiCache subnet group must be deleted or modified'
|
1182
|
+
))
|
1183
|
+
except Exception as e:
|
1184
|
+
logger.warning(f"Failed to analyze ElastiCache subnet groups for VPC {vpc_id}: {e}")
|
1185
|
+
|
1186
|
+
return dependencies
|
1187
|
+
|
1188
|
+
def _check_flow_logs(self, vpc_id: str, ec2_client) -> bool:
|
1189
|
+
"""Check if VPC has flow logs enabled"""
|
1190
|
+
try:
|
1191
|
+
response = ec2_client.describe_flow_logs(
|
1192
|
+
Filters=[
|
1193
|
+
{'Name': 'resource-id', 'Values': [vpc_id]},
|
1194
|
+
{'Name': 'resource-type', 'Values': ['VPC']}
|
1195
|
+
]
|
1196
|
+
)
|
1197
|
+
|
1198
|
+
active_flow_logs = [
|
1199
|
+
fl for fl in response.get('FlowLogs', [])
|
1200
|
+
if fl.get('FlowLogStatus') == 'ACTIVE'
|
1201
|
+
]
|
1202
|
+
|
1203
|
+
return len(active_flow_logs) > 0
|
1204
|
+
|
1205
|
+
except Exception as e:
|
1206
|
+
logger.warning(f"Failed to check flow logs for VPC {vpc_id}: {e}")
|
1207
|
+
return False
|
1208
|
+
|
1209
|
+
def _detect_iac_management(self, tags: Dict[str, str]) -> Tuple[bool, Optional[str]]:
|
1210
|
+
"""Detect if VPC is managed by Infrastructure as Code"""
|
1211
|
+
# Check CloudFormation tags
|
1212
|
+
if 'aws:cloudformation:stack-name' in tags:
|
1213
|
+
return True, f"CloudFormation: {tags['aws:cloudformation:stack-name']}"
|
1214
|
+
|
1215
|
+
# Check Terraform tags
|
1216
|
+
terraform_indicators = [
|
1217
|
+
'terraform', 'tf', 'Terraform', 'TF',
|
1218
|
+
'terragrunt', 'Terragrunt'
|
1219
|
+
]
|
1220
|
+
|
1221
|
+
for key, value in tags.items():
|
1222
|
+
for indicator in terraform_indicators:
|
1223
|
+
if indicator in key or indicator in value:
|
1224
|
+
return True, f"Terraform: {key}={value}"
|
1225
|
+
|
1226
|
+
return False, None
|
1227
|
+
|
1228
|
+
def _assess_cleanup_risk(self, candidate: VPCCleanupCandidate) -> None:
|
1229
|
+
"""Assess cleanup risk and determine phase"""
|
1230
|
+
# Risk assessment based on dependencies and characteristics
|
1231
|
+
if candidate.blocking_dependencies == 0:
|
1232
|
+
if candidate.is_default:
|
1233
|
+
candidate.risk_level = VPCCleanupRisk.LOW
|
1234
|
+
candidate.cleanup_phase = VPCCleanupPhase.IMMEDIATE
|
1235
|
+
candidate.implementation_timeline = "1 week"
|
1236
|
+
else:
|
1237
|
+
candidate.risk_level = VPCCleanupRisk.LOW
|
1238
|
+
candidate.cleanup_phase = VPCCleanupPhase.IMMEDIATE
|
1239
|
+
candidate.implementation_timeline = "1-2 weeks"
|
1240
|
+
elif candidate.blocking_dependencies <= 3:
|
1241
|
+
candidate.risk_level = VPCCleanupRisk.MEDIUM
|
1242
|
+
candidate.cleanup_phase = VPCCleanupPhase.INVESTIGATION
|
1243
|
+
candidate.implementation_timeline = "3-4 weeks"
|
1244
|
+
elif candidate.blocking_dependencies <= 7:
|
1245
|
+
candidate.risk_level = VPCCleanupRisk.HIGH
|
1246
|
+
candidate.cleanup_phase = VPCCleanupPhase.GOVERNANCE
|
1247
|
+
candidate.implementation_timeline = "2-3 weeks"
|
1248
|
+
else:
|
1249
|
+
candidate.risk_level = VPCCleanupRisk.CRITICAL
|
1250
|
+
candidate.cleanup_phase = VPCCleanupPhase.COMPLEX
|
1251
|
+
candidate.implementation_timeline = "6-8 weeks"
|
1252
|
+
|
1253
|
+
# Adjust for IaC management
|
1254
|
+
if candidate.iac_managed:
|
1255
|
+
if candidate.cleanup_phase == VPCCleanupPhase.IMMEDIATE:
|
1256
|
+
candidate.cleanup_phase = VPCCleanupPhase.GOVERNANCE
|
1257
|
+
candidate.implementation_timeline = "2-3 weeks"
|
1258
|
+
|
1259
|
+
# Set approval requirements
|
1260
|
+
candidate.approval_required = (
|
1261
|
+
candidate.risk_level in [VPCCleanupRisk.HIGH, VPCCleanupRisk.CRITICAL] or
|
1262
|
+
candidate.is_default or
|
1263
|
+
candidate.iac_managed
|
1264
|
+
)
|
1265
|
+
|
1266
|
+
def _calculate_financial_impact(self, candidate: VPCCleanupCandidate) -> None:
|
1267
|
+
"""Calculate financial impact of VPC cleanup"""
|
1268
|
+
try:
|
1269
|
+
if not self.cost_engine:
|
1270
|
+
return
|
1271
|
+
|
1272
|
+
monthly_cost = 0.0
|
1273
|
+
|
1274
|
+
# Calculate costs from dependencies
|
1275
|
+
for dep in candidate.dependencies:
|
1276
|
+
if dep.resource_type == 'NatGateway':
|
1277
|
+
# Base NAT Gateway cost
|
1278
|
+
monthly_cost += 45.0 # $0.05/hour * 24 * 30
|
1279
|
+
elif dep.resource_type == 'VpcEndpoint' and 'Interface' in dep.description:
|
1280
|
+
# Interface endpoint cost (estimated 1 AZ)
|
1281
|
+
monthly_cost += 10.0
|
1282
|
+
elif dep.resource_type == 'LoadBalancer':
|
1283
|
+
# Load balancer base cost
|
1284
|
+
monthly_cost += 20.0
|
1285
|
+
elif dep.resource_type == 'ElasticIp':
|
1286
|
+
# Idle EIP cost (assuming idle)
|
1287
|
+
monthly_cost += 3.65 # $0.005/hour * 24 * 30
|
1288
|
+
|
1289
|
+
candidate.monthly_cost = monthly_cost
|
1290
|
+
candidate.annual_savings = monthly_cost * 12
|
1291
|
+
|
1292
|
+
except Exception as e:
|
1293
|
+
logger.warning(f"Failed to calculate costs for VPC {candidate.vpc_id}: {e}")
|
1294
|
+
|
1295
|
+
def generate_cleanup_plan(
|
1296
|
+
self,
|
1297
|
+
candidates: Optional[List[VPCCleanupCandidate]] = None
|
1298
|
+
) -> Dict[str, Any]:
|
1299
|
+
"""
|
1300
|
+
Generate comprehensive VPC cleanup plan with phased approach
|
1301
|
+
|
1302
|
+
Args:
|
1303
|
+
candidates: List of VPC candidates to plan cleanup for
|
1304
|
+
|
1305
|
+
Returns:
|
1306
|
+
Dictionary with cleanup plan and implementation strategy
|
1307
|
+
"""
|
1308
|
+
if not candidates:
|
1309
|
+
candidates = self.cleanup_candidates
|
1310
|
+
|
1311
|
+
if not candidates:
|
1312
|
+
self.console.print("[red]❌ No VPC candidates available for cleanup planning[/red]")
|
1313
|
+
return {}
|
1314
|
+
|
1315
|
+
self.console.print(Panel.fit("📋 Generating VPC Cleanup Plan", style="bold green"))
|
1316
|
+
|
1317
|
+
# Group candidates by cleanup phase
|
1318
|
+
phases = {
|
1319
|
+
VPCCleanupPhase.IMMEDIATE: [],
|
1320
|
+
VPCCleanupPhase.INVESTIGATION: [],
|
1321
|
+
VPCCleanupPhase.GOVERNANCE: [],
|
1322
|
+
VPCCleanupPhase.COMPLEX: []
|
1323
|
+
}
|
1324
|
+
|
1325
|
+
for candidate in candidates:
|
1326
|
+
phases[candidate.cleanup_phase].append(candidate)
|
1327
|
+
|
1328
|
+
# Calculate totals
|
1329
|
+
total_vpcs = len(candidates)
|
1330
|
+
total_cost_savings = sum(candidate.annual_savings for candidate in candidates)
|
1331
|
+
total_blocking_deps = sum(candidate.blocking_dependencies for candidate in candidates)
|
1332
|
+
|
1333
|
+
# Enhanced Three-Bucket Logic Implementation
|
1334
|
+
three_bucket_classification = self._apply_three_bucket_logic(candidates)
|
1335
|
+
|
1336
|
+
cleanup_plan = {
|
1337
|
+
'metadata': {
|
1338
|
+
'generated_at': datetime.now().isoformat(),
|
1339
|
+
'total_vpcs_analyzed': total_vpcs,
|
1340
|
+
'total_annual_savings': total_cost_savings,
|
1341
|
+
'total_blocking_dependencies': total_blocking_deps,
|
1342
|
+
'safety_mode_enabled': self.safety_mode,
|
1343
|
+
'three_bucket_classification': three_bucket_classification
|
1344
|
+
},
|
1345
|
+
'executive_summary': {
|
1346
|
+
'immediate_candidates': len(phases[VPCCleanupPhase.IMMEDIATE]),
|
1347
|
+
'investigation_required': len(phases[VPCCleanupPhase.INVESTIGATION]),
|
1348
|
+
'governance_approval_needed': len(phases[VPCCleanupPhase.GOVERNANCE]),
|
1349
|
+
'complex_migration_required': len(phases[VPCCleanupPhase.COMPLEX]),
|
1350
|
+
'percentage_ready': (len(phases[VPCCleanupPhase.IMMEDIATE]) / total_vpcs * 100) if total_vpcs > 0 else 0,
|
1351
|
+
'business_case_strength': 'Excellent' if total_cost_savings > 50000 else 'Good' if total_cost_savings > 10000 else 'Moderate'
|
1352
|
+
},
|
1353
|
+
'phases': {},
|
1354
|
+
'risk_assessment': self._generate_risk_assessment(candidates),
|
1355
|
+
'implementation_roadmap': self._generate_implementation_roadmap(phases),
|
1356
|
+
'business_impact': self._generate_business_impact(candidates)
|
1357
|
+
}
|
1358
|
+
|
1359
|
+
# Generate detailed phase information
|
1360
|
+
for phase, phase_candidates in phases.items():
|
1361
|
+
if phase_candidates:
|
1362
|
+
cleanup_plan['phases'][phase.value] = {
|
1363
|
+
'candidate_count': len(phase_candidates),
|
1364
|
+
'candidates': [self._serialize_candidate(c) for c in phase_candidates],
|
1365
|
+
'total_savings': sum(c.annual_savings for c in phase_candidates),
|
1366
|
+
'average_timeline': self._calculate_average_timeline(phase_candidates),
|
1367
|
+
'risk_distribution': self._analyze_risk_distribution(phase_candidates)
|
1368
|
+
}
|
1369
|
+
|
1370
|
+
self.analysis_results = cleanup_plan
|
1371
|
+
return cleanup_plan
|
1372
|
+
|
1373
|
+
def _serialize_candidate(self, candidate: VPCCleanupCandidate) -> Dict[str, Any]:
|
1374
|
+
"""Serialize VPC candidate for JSON output"""
|
1375
|
+
return {
|
1376
|
+
'account_id': candidate.account_id,
|
1377
|
+
'vpc_id': candidate.vpc_id,
|
1378
|
+
'vpc_name': candidate.vpc_name,
|
1379
|
+
'cidr_block': candidate.cidr_block,
|
1380
|
+
'is_default': candidate.is_default,
|
1381
|
+
'region': candidate.region,
|
1382
|
+
'blocking_dependencies': candidate.blocking_dependencies,
|
1383
|
+
'risk_level': candidate.risk_level.value,
|
1384
|
+
'cleanup_phase': candidate.cleanup_phase.value,
|
1385
|
+
'monthly_cost': candidate.monthly_cost,
|
1386
|
+
'annual_savings': candidate.annual_savings,
|
1387
|
+
'iac_managed': candidate.iac_managed,
|
1388
|
+
'iac_source': candidate.iac_source,
|
1389
|
+
'approval_required': candidate.approval_required,
|
1390
|
+
'implementation_timeline': candidate.implementation_timeline,
|
1391
|
+
'dependency_summary': {
|
1392
|
+
'total_dependencies': len(candidate.dependencies),
|
1393
|
+
'blocking_dependencies': candidate.blocking_dependencies,
|
1394
|
+
'by_level': {
|
1395
|
+
'internal_data_plane': len([d for d in candidate.dependencies if d.dependency_level == 1]),
|
1396
|
+
'external_interconnects': len([d for d in candidate.dependencies if d.dependency_level == 2]),
|
1397
|
+
'control_plane': len([d for d in candidate.dependencies if d.dependency_level == 3])
|
1398
|
+
}
|
1399
|
+
}
|
1400
|
+
}
|
1401
|
+
|
1402
|
+
def _apply_three_bucket_logic(self, candidates: List[VPCCleanupCandidate]) -> Dict[str, Any]:
|
1403
|
+
"""
|
1404
|
+
Enhanced Three-Bucket Classification Logic for VPC Cleanup
|
1405
|
+
|
1406
|
+
Consolidates VPC candidates into three risk/complexity buckets with
|
1407
|
+
dependency gate validation and MCP cross-validation.
|
1408
|
+
|
1409
|
+
Returns:
|
1410
|
+
Dict containing three-bucket classification with safety metrics
|
1411
|
+
"""
|
1412
|
+
bucket_1_safe = [] # Safe for immediate cleanup (0 ENIs, minimal deps)
|
1413
|
+
bucket_2_analysis = [] # Requires dependency analysis (some deps, investigate)
|
1414
|
+
bucket_3_complex = [] # Complex cleanup (many deps, approval required)
|
1415
|
+
|
1416
|
+
# Safety-first classification with ENI gate validation
|
1417
|
+
for candidate in candidates:
|
1418
|
+
# Critical ENI gate check (blocks deletion if ENIs exist)
|
1419
|
+
eni_gate_passed = candidate.eni_count == 0
|
1420
|
+
|
1421
|
+
# Dependency complexity assessment
|
1422
|
+
total_deps = candidate.blocking_dependencies
|
1423
|
+
has_external_deps = any(
|
1424
|
+
dep.dependency_level >= 2 for dep in candidate.dependencies
|
1425
|
+
) if candidate.dependencies else False
|
1426
|
+
|
1427
|
+
# IaC management check
|
1428
|
+
requires_iac_update = candidate.iac_managed
|
1429
|
+
|
1430
|
+
# Three-bucket classification with safety gates
|
1431
|
+
# FIXED: Allow NO-ENI VPCs including default VPCs for safe cleanup
|
1432
|
+
if (eni_gate_passed and
|
1433
|
+
total_deps == 0 and
|
1434
|
+
not has_external_deps and
|
1435
|
+
not requires_iac_update):
|
1436
|
+
# Bucket 1: Safe for immediate cleanup (includes default VPCs with 0 ENI)
|
1437
|
+
bucket_1_safe.append(candidate)
|
1438
|
+
candidate.bucket_classification = "safe_cleanup"
|
1439
|
+
|
1440
|
+
elif (total_deps <= 3 and
|
1441
|
+
not has_external_deps and
|
1442
|
+
candidate.risk_level in [VPCCleanupRisk.LOW, VPCCleanupRisk.MEDIUM]):
|
1443
|
+
# Bucket 2: Requires analysis but manageable
|
1444
|
+
bucket_2_analysis.append(candidate)
|
1445
|
+
candidate.bucket_classification = "analysis_required"
|
1446
|
+
|
1447
|
+
else:
|
1448
|
+
# Bucket 3: Complex cleanup requiring approval
|
1449
|
+
bucket_3_complex.append(candidate)
|
1450
|
+
candidate.bucket_classification = "complex_approval_required"
|
1451
|
+
|
1452
|
+
# Calculate bucket metrics with real AWS validation
|
1453
|
+
total_candidates = len(candidates)
|
1454
|
+
safe_percentage = (len(bucket_1_safe) / total_candidates * 100) if total_candidates > 0 else 0
|
1455
|
+
analysis_percentage = (len(bucket_2_analysis) / total_candidates * 100) if total_candidates > 0 else 0
|
1456
|
+
complex_percentage = (len(bucket_3_complex) / total_candidates * 100) if total_candidates > 0 else 0
|
1457
|
+
|
1458
|
+
return {
|
1459
|
+
'classification_metadata': {
|
1460
|
+
'total_vpcs_classified': total_candidates,
|
1461
|
+
'eni_gate_validation': 'enforced',
|
1462
|
+
'dependency_analysis': 'comprehensive',
|
1463
|
+
'safety_first_approach': True
|
1464
|
+
},
|
1465
|
+
'bucket_1_safe_cleanup': {
|
1466
|
+
'count': len(bucket_1_safe),
|
1467
|
+
'percentage': round(safe_percentage, 1),
|
1468
|
+
'vpc_ids': [c.vpc_id for c in bucket_1_safe],
|
1469
|
+
'total_savings': sum(c.annual_savings for c in bucket_1_safe),
|
1470
|
+
'criteria': 'Zero ENIs, no dependencies, no IaC (default/non-default both allowed)'
|
1471
|
+
},
|
1472
|
+
'bucket_2_analysis_required': {
|
1473
|
+
'count': len(bucket_2_analysis),
|
1474
|
+
'percentage': round(analysis_percentage, 1),
|
1475
|
+
'vpc_ids': [c.vpc_id for c in bucket_2_analysis],
|
1476
|
+
'total_savings': sum(c.annual_savings for c in bucket_2_analysis),
|
1477
|
+
'criteria': 'Limited dependencies, low-medium risk, analysis needed'
|
1478
|
+
},
|
1479
|
+
'bucket_3_complex_approval': {
|
1480
|
+
'count': len(bucket_3_complex),
|
1481
|
+
'percentage': round(complex_percentage, 1),
|
1482
|
+
'vpc_ids': [c.vpc_id for c in bucket_3_complex],
|
1483
|
+
'total_savings': sum(c.annual_savings for c in bucket_3_complex),
|
1484
|
+
'criteria': 'Multiple dependencies, IaC managed, or high risk'
|
1485
|
+
},
|
1486
|
+
'safety_gates': {
|
1487
|
+
'eni_gate_enforced': True,
|
1488
|
+
'dependency_validation': 'multi_level',
|
1489
|
+
'iac_detection': 'cloudformation_terraform',
|
1490
|
+
'default_vpc_protection': True,
|
1491
|
+
'approval_workflows': 'required_for_bucket_3'
|
1492
|
+
}
|
1493
|
+
}
|
1494
|
+
|
1495
|
+
def _generate_risk_assessment(self, candidates: List[VPCCleanupCandidate]) -> Dict[str, Any]:
|
1496
|
+
"""Generate overall risk assessment"""
|
1497
|
+
risk_counts = {}
|
1498
|
+
for risk_level in VPCCleanupRisk:
|
1499
|
+
risk_counts[risk_level.value] = len([c for c in candidates if c.risk_level == risk_level])
|
1500
|
+
|
1501
|
+
return {
|
1502
|
+
'risk_distribution': risk_counts,
|
1503
|
+
'overall_risk': 'Low' if risk_counts.get('Critical', 0) == 0 and risk_counts.get('High', 0) <= 2 else 'Medium' if risk_counts.get('Critical', 0) <= 1 else 'High',
|
1504
|
+
'mitigation_strategies': [
|
1505
|
+
'Phased implementation starting with lowest risk VPCs',
|
1506
|
+
'Comprehensive dependency validation before deletion',
|
1507
|
+
'Enterprise approval workflows for high-risk deletions',
|
1508
|
+
'Complete rollback procedures documented',
|
1509
|
+
'READ-ONLY analysis mode with explicit approval gates'
|
1510
|
+
]
|
1511
|
+
}
|
1512
|
+
|
1513
|
+
def _generate_implementation_roadmap(self, phases: Dict[VPCCleanupPhase, List[VPCCleanupCandidate]]) -> Dict[str, Any]:
|
1514
|
+
"""Generate implementation roadmap"""
|
1515
|
+
roadmap = {}
|
1516
|
+
|
1517
|
+
phase_order = [
|
1518
|
+
VPCCleanupPhase.IMMEDIATE,
|
1519
|
+
VPCCleanupPhase.INVESTIGATION,
|
1520
|
+
VPCCleanupPhase.GOVERNANCE,
|
1521
|
+
VPCCleanupPhase.COMPLEX
|
1522
|
+
]
|
1523
|
+
|
1524
|
+
for i, phase in enumerate(phase_order, 1):
|
1525
|
+
candidates = phases.get(phase, [])
|
1526
|
+
if candidates:
|
1527
|
+
roadmap[f'Phase_{i}'] = {
|
1528
|
+
'name': phase.value,
|
1529
|
+
'duration': self._calculate_average_timeline(candidates),
|
1530
|
+
'vpc_count': len(candidates),
|
1531
|
+
'savings_potential': sum(c.annual_savings for c in candidates),
|
1532
|
+
'key_activities': self._get_phase_activities(phase),
|
1533
|
+
'success_criteria': self._get_phase_success_criteria(phase),
|
1534
|
+
'stakeholders': self._get_phase_stakeholders(phase)
|
1535
|
+
}
|
1536
|
+
|
1537
|
+
return roadmap
|
1538
|
+
|
1539
|
+
def _generate_business_impact(self, candidates: List[VPCCleanupCandidate]) -> Dict[str, Any]:
|
1540
|
+
"""Generate business impact analysis"""
|
1541
|
+
default_vpc_count = len([c for c in candidates if c.is_default])
|
1542
|
+
|
1543
|
+
return {
|
1544
|
+
'security_improvement': {
|
1545
|
+
'default_vpcs_eliminated': default_vpc_count,
|
1546
|
+
'attack_surface_reduction': f"{(len([c for c in candidates if c.blocking_dependencies == 0]) / len(candidates) * 100):.1f}%" if candidates else "0%",
|
1547
|
+
'compliance_benefit': 'CIS Benchmark compliance' if default_vpc_count > 0 else 'Network governance improvement'
|
1548
|
+
},
|
1549
|
+
'operational_benefits': {
|
1550
|
+
'simplified_network_topology': True,
|
1551
|
+
'reduced_management_overhead': True,
|
1552
|
+
'improved_monitoring_clarity': True,
|
1553
|
+
'enhanced_incident_response': True
|
1554
|
+
},
|
1555
|
+
'financial_impact': {
|
1556
|
+
'total_annual_savings': sum(c.annual_savings for c in candidates),
|
1557
|
+
'implementation_cost_estimate': 5000, # Conservative estimate
|
1558
|
+
'roi_percentage': ((sum(c.annual_savings for c in candidates) / 5000) * 100) if sum(c.annual_savings for c in candidates) > 0 else 0,
|
1559
|
+
'payback_period_months': max(1, 5000 / max(sum(c.monthly_cost for c in candidates), 1))
|
1560
|
+
}
|
1561
|
+
}
|
1562
|
+
|
1563
|
+
def _calculate_average_timeline(self, candidates: List[VPCCleanupCandidate]) -> str:
|
1564
|
+
"""Calculate average implementation timeline for candidates"""
|
1565
|
+
if not candidates:
|
1566
|
+
return "N/A"
|
1567
|
+
|
1568
|
+
# Simple timeline mapping - in practice, you'd parse the timeline strings
|
1569
|
+
timeline_weeks = {
|
1570
|
+
"1 week": 1,
|
1571
|
+
"1-2 weeks": 1.5,
|
1572
|
+
"2-3 weeks": 2.5,
|
1573
|
+
"3-4 weeks": 3.5,
|
1574
|
+
"6-8 weeks": 7
|
1575
|
+
}
|
1576
|
+
|
1577
|
+
total_weeks = 0
|
1578
|
+
for candidate in candidates:
|
1579
|
+
total_weeks += timeline_weeks.get(candidate.implementation_timeline, 2)
|
1580
|
+
|
1581
|
+
avg_weeks = total_weeks / len(candidates)
|
1582
|
+
|
1583
|
+
if avg_weeks <= 1.5:
|
1584
|
+
return "1-2 weeks"
|
1585
|
+
elif avg_weeks <= 2.5:
|
1586
|
+
return "2-3 weeks"
|
1587
|
+
elif avg_weeks <= 4:
|
1588
|
+
return "3-4 weeks"
|
1589
|
+
else:
|
1590
|
+
return "6-8 weeks"
|
1591
|
+
|
1592
|
+
def _analyze_risk_distribution(self, candidates: List[VPCCleanupCandidate]) -> Dict[str, int]:
|
1593
|
+
"""Analyze risk distribution within phase"""
|
1594
|
+
distribution = {}
|
1595
|
+
for risk_level in VPCCleanupRisk:
|
1596
|
+
distribution[risk_level.value] = len([c for c in candidates if c.risk_level == risk_level])
|
1597
|
+
return distribution
|
1598
|
+
|
1599
|
+
def _get_phase_activities(self, phase: VPCCleanupPhase) -> List[str]:
|
1600
|
+
"""Get key activities for cleanup phase"""
|
1601
|
+
activities = {
|
1602
|
+
VPCCleanupPhase.IMMEDIATE: [
|
1603
|
+
"Execute dependency-zero validation",
|
1604
|
+
"Obtain required approvals",
|
1605
|
+
"Perform controlled VPC deletion",
|
1606
|
+
"Verify cleanup completion"
|
1607
|
+
],
|
1608
|
+
VPCCleanupPhase.INVESTIGATION: [
|
1609
|
+
"Conduct traffic analysis",
|
1610
|
+
"Validate business impact",
|
1611
|
+
"Assess migration requirements",
|
1612
|
+
"Define elimination strategy"
|
1613
|
+
],
|
1614
|
+
VPCCleanupPhase.GOVERNANCE: [
|
1615
|
+
"Infrastructure as Code review",
|
1616
|
+
"Enterprise change approval",
|
1617
|
+
"Stakeholder coordination",
|
1618
|
+
"Implementation planning"
|
1619
|
+
],
|
1620
|
+
VPCCleanupPhase.COMPLEX: [
|
1621
|
+
"Comprehensive dependency mapping",
|
1622
|
+
"Migration strategy development",
|
1623
|
+
"Resource relocation planning",
|
1624
|
+
"Enterprise coordination"
|
1625
|
+
]
|
1626
|
+
}
|
1627
|
+
|
1628
|
+
return activities.get(phase, [])
|
1629
|
+
|
1630
|
+
def _get_phase_success_criteria(self, phase: VPCCleanupPhase) -> List[str]:
|
1631
|
+
"""Get success criteria for cleanup phase"""
|
1632
|
+
criteria = {
|
1633
|
+
VPCCleanupPhase.IMMEDIATE: [
|
1634
|
+
"Zero blocking dependencies confirmed",
|
1635
|
+
"All required approvals obtained",
|
1636
|
+
"VPCs successfully deleted",
|
1637
|
+
"No service disruption"
|
1638
|
+
],
|
1639
|
+
VPCCleanupPhase.INVESTIGATION: [
|
1640
|
+
"Complete traffic analysis",
|
1641
|
+
"Business impact assessment",
|
1642
|
+
"Migration plan approved",
|
1643
|
+
"Stakeholder sign-off"
|
1644
|
+
],
|
1645
|
+
VPCCleanupPhase.GOVERNANCE: [
|
1646
|
+
"IaC changes implemented",
|
1647
|
+
"Change management complete",
|
1648
|
+
"All approvals obtained",
|
1649
|
+
"Documentation updated"
|
1650
|
+
],
|
1651
|
+
VPCCleanupPhase.COMPLEX: [
|
1652
|
+
"Dependencies migrated successfully",
|
1653
|
+
"Zero business disruption",
|
1654
|
+
"Complete rollback validated",
|
1655
|
+
"Enterprise approval obtained"
|
1656
|
+
]
|
1657
|
+
}
|
1658
|
+
|
1659
|
+
return criteria.get(phase, [])
|
1660
|
+
|
1661
|
+
def _get_phase_stakeholders(self, phase: VPCCleanupPhase) -> List[str]:
|
1662
|
+
"""Get key stakeholders for cleanup phase"""
|
1663
|
+
stakeholders = {
|
1664
|
+
VPCCleanupPhase.IMMEDIATE: [
|
1665
|
+
"Platform Team",
|
1666
|
+
"Network Engineering",
|
1667
|
+
"Security Team"
|
1668
|
+
],
|
1669
|
+
VPCCleanupPhase.INVESTIGATION: [
|
1670
|
+
"Application Teams",
|
1671
|
+
"Business Owners",
|
1672
|
+
"Network Engineering",
|
1673
|
+
"Platform Team"
|
1674
|
+
],
|
1675
|
+
VPCCleanupPhase.GOVERNANCE: [
|
1676
|
+
"Enterprise Architecture",
|
1677
|
+
"Change Advisory Board",
|
1678
|
+
"Platform Team",
|
1679
|
+
"IaC Team"
|
1680
|
+
],
|
1681
|
+
VPCCleanupPhase.COMPLEX: [
|
1682
|
+
"Enterprise Architecture",
|
1683
|
+
"CTO Office",
|
1684
|
+
"Master Account Stakeholders",
|
1685
|
+
"Change Control Board"
|
1686
|
+
]
|
1687
|
+
}
|
1688
|
+
|
1689
|
+
return stakeholders.get(phase, [])
|
1690
|
+
|
1691
|
+
def display_cleanup_analysis(self, candidates: Optional[List[VPCCleanupCandidate]] = None) -> None:
|
1692
|
+
"""Display comprehensive VPC cleanup analysis with Rich formatting"""
|
1693
|
+
if not candidates:
|
1694
|
+
candidates = self.cleanup_candidates
|
1695
|
+
|
1696
|
+
if not candidates:
|
1697
|
+
self.console.print("[red]❌ No VPC candidates available for display[/red]")
|
1698
|
+
return
|
1699
|
+
|
1700
|
+
# Summary panel
|
1701
|
+
total_vpcs = len(candidates)
|
1702
|
+
immediate_count = len([c for c in candidates if c.cleanup_phase == VPCCleanupPhase.IMMEDIATE])
|
1703
|
+
total_savings = sum(c.annual_savings for c in candidates)
|
1704
|
+
|
1705
|
+
summary = (
|
1706
|
+
f"[bold blue]📊 VPC CLEANUP ANALYSIS SUMMARY[/bold blue]\n"
|
1707
|
+
f"Total VPCs Analyzed: [yellow]{total_vpcs}[/yellow]\n"
|
1708
|
+
f"Immediate Cleanup Ready: [green]{immediate_count}[/green] ({(immediate_count/total_vpcs*100):.1f}%)\n"
|
1709
|
+
f"Total Annual Savings: [bold green]${total_savings:,.2f}[/bold green]\n"
|
1710
|
+
f"Default VPCs Found: [red]{len([c for c in candidates if c.is_default])}[/red]\n"
|
1711
|
+
f"Safety Mode: [cyan]{'ENABLED' if self.safety_mode else 'DISABLED'}[/cyan]"
|
1712
|
+
)
|
1713
|
+
|
1714
|
+
self.console.print(Panel(summary, title="VPC Cleanup Analysis", style="white", width=80))
|
1715
|
+
|
1716
|
+
# Candidates by phase
|
1717
|
+
phases = {}
|
1718
|
+
for candidate in candidates:
|
1719
|
+
phase = candidate.cleanup_phase
|
1720
|
+
if phase not in phases:
|
1721
|
+
phases[phase] = []
|
1722
|
+
phases[phase].append(candidate)
|
1723
|
+
|
1724
|
+
for phase, phase_candidates in phases.items():
|
1725
|
+
if phase_candidates:
|
1726
|
+
self._display_phase_candidates(phase, phase_candidates)
|
1727
|
+
|
1728
|
+
def _display_phase_candidates(self, phase: VPCCleanupPhase, candidates: List[VPCCleanupCandidate]) -> None:
|
1729
|
+
"""Display candidates for a specific cleanup phase"""
|
1730
|
+
# Phase header
|
1731
|
+
phase_colors = {
|
1732
|
+
VPCCleanupPhase.IMMEDIATE: "green",
|
1733
|
+
VPCCleanupPhase.INVESTIGATION: "yellow",
|
1734
|
+
VPCCleanupPhase.GOVERNANCE: "blue",
|
1735
|
+
VPCCleanupPhase.COMPLEX: "red"
|
1736
|
+
}
|
1737
|
+
|
1738
|
+
phase_color = phase_colors.get(phase, "white")
|
1739
|
+
self.console.print(f"\n[bold {phase_color}]🎯 {phase.value} ({len(candidates)} VPCs)[/bold {phase_color}]")
|
1740
|
+
|
1741
|
+
# Create table
|
1742
|
+
table = Table(show_header=True, header_style="bold magenta")
|
1743
|
+
table.add_column("Account", style="cyan", width=12)
|
1744
|
+
table.add_column("VPC ID", style="yellow", width=21)
|
1745
|
+
table.add_column("Name", style="green", width=20)
|
1746
|
+
table.add_column("Default", justify="center", style="red", width=7)
|
1747
|
+
table.add_column("Deps", justify="right", style="blue", width=4)
|
1748
|
+
table.add_column("Risk", style="magenta", width=8)
|
1749
|
+
table.add_column("Savings", justify="right", style="green", width=10)
|
1750
|
+
table.add_column("Timeline", style="cyan", width=10)
|
1751
|
+
|
1752
|
+
for candidate in candidates:
|
1753
|
+
table.add_row(
|
1754
|
+
candidate.account_id[-6:] if candidate.account_id != "unknown" else "N/A",
|
1755
|
+
candidate.vpc_id,
|
1756
|
+
(candidate.vpc_name or "N/A")[:18] + ("..." if len(candidate.vpc_name or "") > 18 else ""),
|
1757
|
+
"✅" if candidate.is_default else "❌",
|
1758
|
+
str(candidate.blocking_dependencies),
|
1759
|
+
candidate.risk_level.value,
|
1760
|
+
f"${candidate.annual_savings:,.0f}",
|
1761
|
+
candidate.implementation_timeline
|
1762
|
+
)
|
1763
|
+
|
1764
|
+
self.console.print(table)
|
1765
|
+
|
1766
|
+
# Phase summary
|
1767
|
+
phase_savings = sum(c.annual_savings for c in candidates)
|
1768
|
+
phase_risk_high = len([c for c in candidates if c.risk_level in [VPCCleanupRisk.HIGH, VPCCleanupRisk.CRITICAL]])
|
1769
|
+
|
1770
|
+
phase_summary = (
|
1771
|
+
f"Phase Savings: [green]${phase_savings:,.2f}[/green] | "
|
1772
|
+
f"High Risk: [red]{phase_risk_high}[/red] | "
|
1773
|
+
f"IaC Managed: [blue]{len([c for c in candidates if c.iac_managed])}[/blue]"
|
1774
|
+
)
|
1775
|
+
self.console.print(f"[dim]{phase_summary}[/dim]")
|
1776
|
+
|
1777
|
+
def export_cleanup_plan(
|
1778
|
+
self,
|
1779
|
+
output_directory: str = "./exports/vpc_cleanup",
|
1780
|
+
include_dependencies: bool = True
|
1781
|
+
) -> Dict[str, str]:
|
1782
|
+
"""
|
1783
|
+
Export comprehensive VPC cleanup plan and analysis results
|
1784
|
+
|
1785
|
+
Args:
|
1786
|
+
output_directory: Directory to export results
|
1787
|
+
include_dependencies: Include detailed dependency information
|
1788
|
+
|
1789
|
+
Returns:
|
1790
|
+
Dictionary with exported file paths
|
1791
|
+
"""
|
1792
|
+
output_path = Path(output_directory)
|
1793
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
1794
|
+
|
1795
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
1796
|
+
exported_files = {}
|
1797
|
+
|
1798
|
+
# Export cleanup plan
|
1799
|
+
if self.analysis_results:
|
1800
|
+
plan_file = output_path / f"vpc_cleanup_plan_{timestamp}.json"
|
1801
|
+
with open(plan_file, 'w') as f:
|
1802
|
+
json.dump(self.analysis_results, f, indent=2, default=str)
|
1803
|
+
exported_files['cleanup_plan'] = str(plan_file)
|
1804
|
+
|
1805
|
+
# Export candidate details
|
1806
|
+
if self.cleanup_candidates:
|
1807
|
+
candidates_file = output_path / f"vpc_candidates_{timestamp}.json"
|
1808
|
+
candidates_data = {
|
1809
|
+
'metadata': {
|
1810
|
+
'generated_at': datetime.now().isoformat(),
|
1811
|
+
'total_candidates': len(self.cleanup_candidates),
|
1812
|
+
'profile': self.profile,
|
1813
|
+
'region': self.region,
|
1814
|
+
'safety_mode': self.safety_mode
|
1815
|
+
},
|
1816
|
+
'candidates': []
|
1817
|
+
}
|
1818
|
+
|
1819
|
+
for candidate in self.cleanup_candidates:
|
1820
|
+
candidate_data = self._serialize_candidate(candidate)
|
1821
|
+
|
1822
|
+
# Add detailed dependencies if requested
|
1823
|
+
if include_dependencies and candidate.dependencies:
|
1824
|
+
candidate_data['dependencies'] = [
|
1825
|
+
{
|
1826
|
+
'resource_type': dep.resource_type,
|
1827
|
+
'resource_id': dep.resource_id,
|
1828
|
+
'resource_name': dep.resource_name,
|
1829
|
+
'dependency_level': dep.dependency_level,
|
1830
|
+
'blocking': dep.blocking,
|
1831
|
+
'deletion_order': dep.deletion_order,
|
1832
|
+
'api_method': dep.api_method,
|
1833
|
+
'description': dep.description
|
1834
|
+
}
|
1835
|
+
for dep in candidate.dependencies
|
1836
|
+
]
|
1837
|
+
|
1838
|
+
candidates_data['candidates'].append(candidate_data)
|
1839
|
+
|
1840
|
+
with open(candidates_file, 'w') as f:
|
1841
|
+
json.dump(candidates_data, f, indent=2, default=str)
|
1842
|
+
exported_files['candidates'] = str(candidates_file)
|
1843
|
+
|
1844
|
+
# Export CSV summary
|
1845
|
+
if self.cleanup_candidates:
|
1846
|
+
import csv
|
1847
|
+
|
1848
|
+
csv_file = output_path / f"vpc_cleanup_summary_{timestamp}.csv"
|
1849
|
+
with open(csv_file, 'w', newline='') as f:
|
1850
|
+
fieldnames = [
|
1851
|
+
'account_id', 'vpc_id', 'vpc_name', 'cidr_block', 'is_default',
|
1852
|
+
'region', 'blocking_dependencies', 'risk_level', 'cleanup_phase',
|
1853
|
+
'monthly_cost', 'annual_savings', 'iac_managed', 'approval_required',
|
1854
|
+
'implementation_timeline'
|
1855
|
+
]
|
1856
|
+
|
1857
|
+
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
1858
|
+
writer.writeheader()
|
1859
|
+
|
1860
|
+
for candidate in self.cleanup_candidates:
|
1861
|
+
writer.writerow({
|
1862
|
+
'account_id': candidate.account_id,
|
1863
|
+
'vpc_id': candidate.vpc_id,
|
1864
|
+
'vpc_name': candidate.vpc_name or '',
|
1865
|
+
'cidr_block': candidate.cidr_block,
|
1866
|
+
'is_default': candidate.is_default,
|
1867
|
+
'region': candidate.region,
|
1868
|
+
'blocking_dependencies': candidate.blocking_dependencies,
|
1869
|
+
'risk_level': candidate.risk_level.value,
|
1870
|
+
'cleanup_phase': candidate.cleanup_phase.value,
|
1871
|
+
'monthly_cost': candidate.monthly_cost,
|
1872
|
+
'annual_savings': candidate.annual_savings,
|
1873
|
+
'iac_managed': candidate.iac_managed,
|
1874
|
+
'approval_required': candidate.approval_required,
|
1875
|
+
'implementation_timeline': candidate.implementation_timeline
|
1876
|
+
})
|
1877
|
+
|
1878
|
+
exported_files['csv_summary'] = str(csv_file)
|
1879
|
+
|
1880
|
+
self.console.print(f"[green]✅ Exported {len(exported_files)} files to {output_directory}[/green]")
|
1881
|
+
|
1882
|
+
return exported_files
|
1883
|
+
|
1884
|
+
# Performance and Reliability Enhancement Methods
|
1885
|
+
|
1886
|
+
def _perform_health_check(self):
|
1887
|
+
"""Perform comprehensive health check before starting VPC analysis."""
|
1888
|
+
self.console.print("[cyan]🔍 Performing system health check...[/cyan]")
|
1889
|
+
|
1890
|
+
health_issues = []
|
1891
|
+
|
1892
|
+
# Check AWS session
|
1893
|
+
if not self.session:
|
1894
|
+
health_issues.append("No AWS session available")
|
1895
|
+
else:
|
1896
|
+
try:
|
1897
|
+
sts = self.session.client('sts')
|
1898
|
+
identity = sts.get_caller_identity()
|
1899
|
+
self.console.print(f"[green]✅ AWS Session: {identity.get('Account', 'Unknown')}[/green]")
|
1900
|
+
except Exception as e:
|
1901
|
+
health_issues.append(f"AWS session invalid: {e}")
|
1902
|
+
|
1903
|
+
# Check circuit breaker states
|
1904
|
+
open_circuits = [name for name, cb in self.circuit_breakers.items() if cb.state == "open"]
|
1905
|
+
if open_circuits:
|
1906
|
+
health_issues.append(f"Circuit breakers open: {len(open_circuits)}")
|
1907
|
+
self.console.print(f"[yellow]⚠️ Open circuit breakers: {len(open_circuits)}[/yellow]")
|
1908
|
+
else:
|
1909
|
+
self.console.print("[green]✅ All circuit breakers closed[/green]")
|
1910
|
+
|
1911
|
+
# Check thread pool availability
|
1912
|
+
if self.enable_parallel_processing and not self.executor:
|
1913
|
+
health_issues.append("Parallel processing enabled but no executor available")
|
1914
|
+
elif self.executor:
|
1915
|
+
self.console.print(f"[green]✅ Thread pool ready: {self.max_workers} workers[/green]")
|
1916
|
+
|
1917
|
+
# Check cache status
|
1918
|
+
if self.analysis_cache:
|
1919
|
+
cache_size = len(self.analysis_cache.vpc_data)
|
1920
|
+
self.console.print(f"[green]✅ Cache enabled: {cache_size} entries[/green]")
|
1921
|
+
|
1922
|
+
if health_issues:
|
1923
|
+
self.console.print(f"[red]❌ Health issues detected: {len(health_issues)}[/red]")
|
1924
|
+
for issue in health_issues:
|
1925
|
+
self.console.print(f"[red] • {issue}[/red]")
|
1926
|
+
else:
|
1927
|
+
self.console.print("[green]✅ System health check passed[/green]")
|
1928
|
+
|
1929
|
+
def _check_performance_targets(self, metrics):
|
1930
|
+
"""Check if performance targets are met and handle performance issues."""
|
1931
|
+
if metrics.duration and metrics.duration > 30.0: # 30 second target
|
1932
|
+
performance_warning = (
|
1933
|
+
f"VPC analysis took {metrics.duration:.1f}s, exceeding 30s target"
|
1934
|
+
)
|
1935
|
+
|
1936
|
+
error_context = ErrorContext(
|
1937
|
+
module_name="vpc",
|
1938
|
+
operation="performance_check",
|
1939
|
+
aws_profile=self.profile,
|
1940
|
+
aws_region=self.region,
|
1941
|
+
performance_context={
|
1942
|
+
"execution_time": metrics.duration,
|
1943
|
+
"target_time": 30.0,
|
1944
|
+
"vpcs_analyzed": self.performance_metrics.total_vpcs_analyzed
|
1945
|
+
}
|
1946
|
+
)
|
1947
|
+
|
1948
|
+
self.exception_handler.handle_performance_error(
|
1949
|
+
"vpc_cleanup_analysis",
|
1950
|
+
metrics.duration,
|
1951
|
+
30.0,
|
1952
|
+
error_context
|
1953
|
+
)
|
1954
|
+
|
1955
|
+
def _display_performance_summary(self):
|
1956
|
+
"""Display comprehensive performance summary with Rich formatting."""
|
1957
|
+
summary_table = Table(title="🚀 VPC Analysis Performance Summary")
|
1958
|
+
summary_table.add_column("Metric", style="cyan", justify="left")
|
1959
|
+
summary_table.add_column("Value", style="white", justify="right")
|
1960
|
+
summary_table.add_column("Status", style="white", justify="center")
|
1961
|
+
|
1962
|
+
# Total execution time
|
1963
|
+
time_status = "🟢" if self.performance_metrics.total_execution_time <= 30.0 else "🟡"
|
1964
|
+
summary_table.add_row(
|
1965
|
+
"Total Execution Time",
|
1966
|
+
f"{self.performance_metrics.total_execution_time:.2f}s",
|
1967
|
+
time_status
|
1968
|
+
)
|
1969
|
+
|
1970
|
+
# VPCs analyzed
|
1971
|
+
summary_table.add_row(
|
1972
|
+
"VPCs Analyzed",
|
1973
|
+
str(self.performance_metrics.total_vpcs_analyzed),
|
1974
|
+
"📊"
|
1975
|
+
)
|
1976
|
+
|
1977
|
+
# Average analysis time per VPC
|
1978
|
+
if self.performance_metrics.average_vpc_analysis_time > 0:
|
1979
|
+
avg_status = "🟢" if self.performance_metrics.average_vpc_analysis_time <= 5.0 else "🟡"
|
1980
|
+
summary_table.add_row(
|
1981
|
+
"Avg Time per VPC",
|
1982
|
+
f"{self.performance_metrics.average_vpc_analysis_time:.2f}s",
|
1983
|
+
avg_status
|
1984
|
+
)
|
1985
|
+
|
1986
|
+
# Cache performance
|
1987
|
+
if self.analysis_cache:
|
1988
|
+
cache_ratio = self.performance_metrics.get_cache_hit_ratio()
|
1989
|
+
cache_status = "🟢" if cache_ratio >= 0.5 else "🟡" if cache_ratio >= 0.2 else "🔴"
|
1990
|
+
summary_table.add_row(
|
1991
|
+
"Cache Hit Ratio",
|
1992
|
+
f"{cache_ratio:.1%}",
|
1993
|
+
cache_status
|
1994
|
+
)
|
1995
|
+
|
1996
|
+
# Parallel operations
|
1997
|
+
if self.performance_metrics.parallel_operations > 0:
|
1998
|
+
summary_table.add_row(
|
1999
|
+
"Parallel Operations",
|
2000
|
+
str(self.performance_metrics.parallel_operations),
|
2001
|
+
"⚡"
|
2002
|
+
)
|
2003
|
+
|
2004
|
+
# API call efficiency
|
2005
|
+
total_api_calls = self.performance_metrics.api_calls_made + self.performance_metrics.api_calls_cached
|
2006
|
+
if total_api_calls > 0:
|
2007
|
+
efficiency = (self.performance_metrics.api_calls_cached / total_api_calls) * 100
|
2008
|
+
efficiency_status = "🟢" if efficiency >= 20 else "🟡"
|
2009
|
+
summary_table.add_row(
|
2010
|
+
"API Call Efficiency",
|
2011
|
+
f"{efficiency:.1f}%",
|
2012
|
+
efficiency_status
|
2013
|
+
)
|
2014
|
+
|
2015
|
+
# Error rate
|
2016
|
+
error_rate = self.performance_metrics.get_error_rate()
|
2017
|
+
error_status = "🟢" if error_rate == 0 else "🟡" if error_rate <= 0.1 else "🔴"
|
2018
|
+
summary_table.add_row(
|
2019
|
+
"Error Rate",
|
2020
|
+
f"{error_rate:.1%}",
|
2021
|
+
error_status
|
2022
|
+
)
|
2023
|
+
|
2024
|
+
self.console.print(summary_table)
|
2025
|
+
|
2026
|
+
# Performance recommendations
|
2027
|
+
recommendations = []
|
2028
|
+
|
2029
|
+
if self.performance_metrics.total_execution_time > 30.0:
|
2030
|
+
recommendations.append("Consider enabling parallel processing for better performance")
|
2031
|
+
|
2032
|
+
if self.analysis_cache and self.performance_metrics.get_cache_hit_ratio() < 0.2:
|
2033
|
+
recommendations.append("Cache hit ratio is low - consider increasing cache TTL")
|
2034
|
+
|
2035
|
+
if error_rate > 0.1:
|
2036
|
+
recommendations.append("High error rate detected - review AWS connectivity and permissions")
|
2037
|
+
|
2038
|
+
if self.performance_metrics.api_calls_made > 100:
|
2039
|
+
recommendations.append("High API usage detected - consider implementing request batching")
|
2040
|
+
|
2041
|
+
if recommendations:
|
2042
|
+
rec_panel = Panel(
|
2043
|
+
"\n".join([f"• {rec}" for rec in recommendations]),
|
2044
|
+
title="⚡ Performance Recommendations",
|
2045
|
+
border_style="yellow"
|
2046
|
+
)
|
2047
|
+
self.console.print(rec_panel)
|
2048
|
+
|
2049
|
+
def _fallback_analysis(self, vpc_ids: Optional[List[str]], account_profiles: Optional[List[str]]) -> List[VPCCleanupCandidate]:
|
2050
|
+
"""Fallback analysis method with reduced functionality but higher reliability."""
|
2051
|
+
self.console.print("[yellow]🔄 Using fallback analysis mode...[/yellow]")
|
2052
|
+
|
2053
|
+
# Disable advanced features for fallback
|
2054
|
+
original_parallel = self.enable_parallel_processing
|
2055
|
+
original_caching = self.enable_caching
|
2056
|
+
|
2057
|
+
try:
|
2058
|
+
self.enable_parallel_processing = False
|
2059
|
+
self.enable_caching = False
|
2060
|
+
|
2061
|
+
# Use original analysis methods
|
2062
|
+
if account_profiles and len(account_profiles) > 1:
|
2063
|
+
return self._analyze_multi_account_vpcs(account_profiles, vpc_ids)
|
2064
|
+
else:
|
2065
|
+
return self._analyze_single_account_vpcs(vpc_ids)
|
2066
|
+
|
2067
|
+
finally:
|
2068
|
+
# Restore original settings
|
2069
|
+
self.enable_parallel_processing = original_parallel
|
2070
|
+
self.enable_caching = original_caching
|
2071
|
+
|
2072
|
+
def _analyze_multi_account_vpcs_optimized(
|
2073
|
+
self,
|
2074
|
+
account_profiles: List[str],
|
2075
|
+
vpc_ids: Optional[List[str]]
|
2076
|
+
) -> List[VPCCleanupCandidate]:
|
2077
|
+
"""Analyze VPCs across multiple accounts with performance optimization."""
|
2078
|
+
all_candidates = []
|
2079
|
+
|
2080
|
+
self.console.print(f"[cyan]🌐 Multi-account analysis across {len(account_profiles)} accounts with optimization[/cyan]")
|
2081
|
+
|
2082
|
+
# Process accounts in parallel if enabled
|
2083
|
+
if self.enable_parallel_processing and len(account_profiles) > 1:
|
2084
|
+
account_futures = {}
|
2085
|
+
|
2086
|
+
for profile in account_profiles:
|
2087
|
+
future = self.executor.submit(self._analyze_account_with_circuit_breaker, profile, vpc_ids)
|
2088
|
+
account_futures[profile] = future
|
2089
|
+
|
2090
|
+
# Collect results
|
2091
|
+
for profile, future in account_futures.items():
|
2092
|
+
try:
|
2093
|
+
account_candidates = future.result(timeout=300) # 5 minute timeout per account
|
2094
|
+
all_candidates.extend(account_candidates)
|
2095
|
+
except Exception as e:
|
2096
|
+
self.console.print(f"[red]❌ Error analyzing account {profile}: {e}[/red]")
|
2097
|
+
logger.error(f"Multi-account analysis failed for {profile}: {e}")
|
2098
|
+
else:
|
2099
|
+
# Sequential account processing
|
2100
|
+
for profile in account_profiles:
|
2101
|
+
try:
|
2102
|
+
account_candidates = self._analyze_account_with_circuit_breaker(profile, vpc_ids)
|
2103
|
+
all_candidates.extend(account_candidates)
|
2104
|
+
except Exception as e:
|
2105
|
+
self.console.print(f"[red]❌ Error analyzing account {profile}: {e}[/red]")
|
2106
|
+
logger.error(f"Multi-account analysis failed for {profile}: {e}")
|
2107
|
+
|
2108
|
+
self.cleanup_candidates = all_candidates
|
2109
|
+
return all_candidates
|
2110
|
+
|
2111
|
+
def _analyze_account_with_circuit_breaker(self, profile: str, vpc_ids: Optional[List[str]]) -> List[VPCCleanupCandidate]:
|
2112
|
+
"""Analyze single account with circuit breaker protection."""
|
2113
|
+
circuit_breaker = self.circuit_breakers[f"account_analysis_{profile}"]
|
2114
|
+
|
2115
|
+
if not circuit_breaker.should_allow_request():
|
2116
|
+
logger.warning(f"Circuit breaker open for account {profile}, skipping analysis")
|
2117
|
+
return []
|
2118
|
+
|
2119
|
+
try:
|
2120
|
+
# Create session for this account
|
2121
|
+
account_session = create_operational_session(profile=profile)
|
2122
|
+
|
2123
|
+
# Temporarily update session for analysis
|
2124
|
+
original_session = self.session
|
2125
|
+
self.session = account_session
|
2126
|
+
|
2127
|
+
# Get account ID for tracking
|
2128
|
+
sts_client = account_session.client('sts')
|
2129
|
+
account_id = sts_client.get_caller_identity()['Account']
|
2130
|
+
|
2131
|
+
self.console.print(f"[blue]📋 Analyzing account: {account_id} (profile: {profile})[/blue]")
|
2132
|
+
|
2133
|
+
# Analyze VPCs in this account using optimized method
|
2134
|
+
account_candidates = self._analyze_single_account_vpcs_optimized(vpc_ids)
|
2135
|
+
|
2136
|
+
# Update account ID for all candidates
|
2137
|
+
for candidate in account_candidates:
|
2138
|
+
candidate.account_id = account_id
|
2139
|
+
|
2140
|
+
# Record success
|
2141
|
+
circuit_breaker.record_success()
|
2142
|
+
|
2143
|
+
return account_candidates
|
2144
|
+
|
2145
|
+
except Exception as e:
|
2146
|
+
circuit_breaker.record_failure()
|
2147
|
+
logger.error(f"Account analysis failed for {profile}: {e}")
|
2148
|
+
raise
|
2149
|
+
|
2150
|
+
finally:
|
2151
|
+
# Restore original session
|
2152
|
+
self.session = original_session
|
2153
|
+
|
2154
|
+
def create_rollback_plan(self, candidates: List[VPCCleanupCandidate]) -> Dict[str, Any]:
|
2155
|
+
"""Create comprehensive rollback plan for VPC cleanup operations."""
|
2156
|
+
rollback_plan = {
|
2157
|
+
'plan_id': f"rollback_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
|
2158
|
+
'created_at': datetime.now().isoformat(),
|
2159
|
+
'total_vpcs': len(candidates),
|
2160
|
+
'rollback_procedures': [],
|
2161
|
+
'validation_steps': [],
|
2162
|
+
'emergency_contacts': [],
|
2163
|
+
'recovery_time_estimate': '4-8 hours'
|
2164
|
+
}
|
2165
|
+
|
2166
|
+
for candidate in candidates:
|
2167
|
+
vpc_rollback = {
|
2168
|
+
'vpc_id': candidate.vpc_id,
|
2169
|
+
'account_id': candidate.account_id,
|
2170
|
+
'region': candidate.region,
|
2171
|
+
'rollback_steps': [],
|
2172
|
+
'validation_commands': [],
|
2173
|
+
'dependencies_to_recreate': []
|
2174
|
+
}
|
2175
|
+
|
2176
|
+
# Generate rollback steps based on dependencies
|
2177
|
+
for dep in sorted(candidate.dependencies, key=lambda x: x.deletion_order, reverse=True):
|
2178
|
+
rollback_step = {
|
2179
|
+
'step': f"Recreate {dep.resource_type}",
|
2180
|
+
'resource_id': dep.resource_id,
|
2181
|
+
'api_method': dep.api_method.replace('delete_', 'create_'),
|
2182
|
+
'validation': f"Verify {dep.resource_type} {dep.resource_id} is functional"
|
2183
|
+
}
|
2184
|
+
vpc_rollback['rollback_steps'].append(rollback_step)
|
2185
|
+
|
2186
|
+
# Add VPC recreation as final step
|
2187
|
+
vpc_rollback['rollback_steps'].append({
|
2188
|
+
'step': 'Recreate VPC',
|
2189
|
+
'resource_id': candidate.vpc_id,
|
2190
|
+
'api_method': 'create_vpc',
|
2191
|
+
'parameters': {
|
2192
|
+
'CidrBlock': candidate.cidr_block,
|
2193
|
+
'TagSpecifications': candidate.tags
|
2194
|
+
}
|
2195
|
+
})
|
2196
|
+
|
2197
|
+
rollback_plan['rollback_procedures'].append(vpc_rollback)
|
2198
|
+
|
2199
|
+
# Store rollback plan
|
2200
|
+
self.rollback_procedures.append(rollback_plan)
|
2201
|
+
|
2202
|
+
return rollback_plan
|
2203
|
+
|
2204
|
+
def get_health_status(self) -> Dict[str, Any]:
|
2205
|
+
"""Get comprehensive health status of the VPC cleanup framework."""
|
2206
|
+
circuit_breaker_status = {}
|
2207
|
+
for name, cb in self.circuit_breakers.items():
|
2208
|
+
circuit_breaker_status[name] = {
|
2209
|
+
'state': cb.state,
|
2210
|
+
'failure_count': cb.failure_count,
|
2211
|
+
'last_failure': cb.last_failure_time
|
2212
|
+
}
|
2213
|
+
|
2214
|
+
return {
|
2215
|
+
'timestamp': datetime.now().isoformat(),
|
2216
|
+
'aws_session_healthy': self.session is not None,
|
2217
|
+
'parallel_processing_enabled': self.enable_parallel_processing,
|
2218
|
+
'caching_enabled': self.enable_caching,
|
2219
|
+
'circuit_breakers': circuit_breaker_status,
|
2220
|
+
'performance_metrics': {
|
2221
|
+
'total_vpcs_analyzed': self.performance_metrics.total_vpcs_analyzed,
|
2222
|
+
'error_rate': self.performance_metrics.get_error_rate(),
|
2223
|
+
'cache_hit_ratio': self.performance_metrics.get_cache_hit_ratio(),
|
2224
|
+
'average_analysis_time': self.performance_metrics.average_vpc_analysis_time
|
2225
|
+
},
|
2226
|
+
'thread_pool_healthy': self.executor is not None if self.enable_parallel_processing else True,
|
2227
|
+
'rollback_procedures_available': len(self.rollback_procedures)
|
2228
|
+
}
|
2229
|
+
|
2230
|
+
# Enhanced Performance and Reliability Methods
|
2231
|
+
|
2232
|
+
def _perform_comprehensive_health_check(self):
|
2233
|
+
"""Perform comprehensive health check with enhanced performance validation."""
|
2234
|
+
self.console.print("[cyan]🔍 Performing comprehensive system health check...[/cyan]")
|
2235
|
+
|
2236
|
+
health_issues = []
|
2237
|
+
performance_warnings = []
|
2238
|
+
|
2239
|
+
# Basic health checks
|
2240
|
+
if not self.session:
|
2241
|
+
health_issues.append("No AWS session available")
|
2242
|
+
else:
|
2243
|
+
try:
|
2244
|
+
sts = self.session.client('sts')
|
2245
|
+
identity = sts.get_caller_identity()
|
2246
|
+
self.console.print(f"[green]✅ AWS Session: {identity.get('Account', 'Unknown')}[/green]")
|
2247
|
+
except Exception as e:
|
2248
|
+
health_issues.append(f"AWS session invalid: {e}")
|
2249
|
+
|
2250
|
+
# Enhanced parallel processing validation
|
2251
|
+
if self.enable_parallel_processing:
|
2252
|
+
if not self.executor:
|
2253
|
+
health_issues.append("Parallel processing enabled but no executor available")
|
2254
|
+
else:
|
2255
|
+
# Test thread pool responsiveness
|
2256
|
+
try:
|
2257
|
+
test_future = self.executor.submit(lambda: time.sleep(0.1))
|
2258
|
+
test_future.result(timeout=1.0)
|
2259
|
+
self.console.print(f"[green]✅ Thread pool responsive: {self.max_workers} workers[/green]")
|
2260
|
+
except Exception as e:
|
2261
|
+
performance_warnings.append(f"Thread pool responsiveness issue: {e}")
|
2262
|
+
|
2263
|
+
# Enhanced caching system validation
|
2264
|
+
if self.analysis_cache:
|
2265
|
+
cache_size = len(self.analysis_cache.vpc_data)
|
2266
|
+
cache_validity = sum(1 for vpc_id in self.analysis_cache.vpc_data.keys()
|
2267
|
+
if self.analysis_cache.is_valid(vpc_id))
|
2268
|
+
cache_health = cache_validity / max(cache_size, 1)
|
2269
|
+
|
2270
|
+
if cache_health < 0.5 and cache_size > 0:
|
2271
|
+
performance_warnings.append(f"Cache health low: {cache_health:.1%} valid entries")
|
2272
|
+
else:
|
2273
|
+
self.console.print(f"[green]✅ Cache system healthy: {cache_size} entries, {cache_health:.1%} valid[/green]")
|
2274
|
+
|
2275
|
+
# Circuit breaker health assessment
|
2276
|
+
open_circuits = [name for name, cb in self.circuit_breakers.items() if cb.state == "open"]
|
2277
|
+
half_open_circuits = [name for name, cb in self.circuit_breakers.items() if cb.state == "half-open"]
|
2278
|
+
|
2279
|
+
if open_circuits:
|
2280
|
+
health_issues.append(f"Circuit breakers open: {len(open_circuits)}")
|
2281
|
+
self.console.print(f"[red]❌ Open circuit breakers: {len(open_circuits)}[/red]")
|
2282
|
+
elif half_open_circuits:
|
2283
|
+
performance_warnings.append(f"Circuit breakers recovering: {len(half_open_circuits)}")
|
2284
|
+
self.console.print(f"[yellow]⚠️ Recovering circuit breakers: {len(half_open_circuits)}[/yellow]")
|
2285
|
+
else:
|
2286
|
+
self.console.print("[green]✅ All circuit breakers healthy[/green]")
|
2287
|
+
|
2288
|
+
# Performance benchmark validation
|
2289
|
+
if hasattr(self, 'performance_benchmark'):
|
2290
|
+
target_time = self.performance_benchmark.config.target_duration
|
2291
|
+
if target_time > 30.0:
|
2292
|
+
performance_warnings.append(f"Performance target {target_time}s exceeds 30s requirement")
|
2293
|
+
|
2294
|
+
# Report health status
|
2295
|
+
if health_issues:
|
2296
|
+
self.console.print(f"[red]❌ Health issues detected: {len(health_issues)}[/red]")
|
2297
|
+
for issue in health_issues:
|
2298
|
+
self.console.print(f"[red] • {issue}[/red]")
|
2299
|
+
else:
|
2300
|
+
self.console.print("[green]✅ All critical systems healthy[/green]")
|
2301
|
+
|
2302
|
+
if performance_warnings:
|
2303
|
+
self.console.print(f"[yellow]⚠️ Performance warnings: {len(performance_warnings)}[/yellow]")
|
2304
|
+
for warning in performance_warnings:
|
2305
|
+
self.console.print(f"[yellow] • {warning}[/yellow]")
|
2306
|
+
|
2307
|
+
def _validate_performance_targets(self, metrics):
|
2308
|
+
"""Enhanced performance target validation with detailed analysis."""
|
2309
|
+
target_time = 30.0 # <30s requirement
|
2310
|
+
|
2311
|
+
if metrics.duration and metrics.duration > target_time:
|
2312
|
+
performance_degradation = {
|
2313
|
+
"execution_time": metrics.duration,
|
2314
|
+
"target_time": target_time,
|
2315
|
+
"degradation_percentage": ((metrics.duration - target_time) / target_time) * 100,
|
2316
|
+
"vpcs_analyzed": self.performance_metrics.total_vpcs_analyzed,
|
2317
|
+
"parallel_enabled": self.enable_parallel_processing,
|
2318
|
+
"cache_enabled": self.enable_caching
|
2319
|
+
}
|
2320
|
+
|
2321
|
+
error_context = ErrorContext(
|
2322
|
+
module_name="vpc",
|
2323
|
+
operation="performance_validation",
|
2324
|
+
aws_profile=self.profile,
|
2325
|
+
aws_region=self.region,
|
2326
|
+
performance_context=performance_degradation
|
2327
|
+
)
|
2328
|
+
|
2329
|
+
self.exception_handler.handle_performance_error(
|
2330
|
+
"vpc_cleanup_analysis",
|
2331
|
+
metrics.duration,
|
2332
|
+
target_time,
|
2333
|
+
error_context
|
2334
|
+
)
|
2335
|
+
|
2336
|
+
# Provide performance optimization suggestions
|
2337
|
+
self._suggest_performance_optimizations(performance_degradation)
|
2338
|
+
else:
|
2339
|
+
self.console.print(f"[green]✅ Performance target achieved: {metrics.duration:.2f}s ≤ {target_time}s[/green]")
|
2340
|
+
|
2341
|
+
def _suggest_performance_optimizations(self, degradation_data: Dict[str, Any]):
|
2342
|
+
"""Suggest performance optimizations based on current performance."""
|
2343
|
+
suggestions = []
|
2344
|
+
|
2345
|
+
degradation_pct = degradation_data.get("degradation_percentage", 0)
|
2346
|
+
|
2347
|
+
if degradation_pct > 50: # Significant degradation
|
2348
|
+
if not degradation_data.get("parallel_enabled"):
|
2349
|
+
suggestions.append("Enable parallel processing with 'enable_parallel_processing=True'")
|
2350
|
+
if not degradation_data.get("cache_enabled"):
|
2351
|
+
suggestions.append("Enable caching with 'enable_caching=True'")
|
2352
|
+
if degradation_data.get("vpcs_analyzed", 0) > 20:
|
2353
|
+
suggestions.append("Consider batch processing for large VPC counts")
|
2354
|
+
|
2355
|
+
if degradation_pct > 25: # Moderate degradation
|
2356
|
+
suggestions.append("Review AWS API rate limiting and connection pooling")
|
2357
|
+
suggestions.append("Consider filtering VPC analysis to specific regions")
|
2358
|
+
suggestions.append("Check network latency to AWS APIs")
|
2359
|
+
|
2360
|
+
if suggestions:
|
2361
|
+
suggestion_panel = Panel(
|
2362
|
+
"\n".join([f"• {suggestion}" for suggestion in suggestions]),
|
2363
|
+
title="⚡ Performance Optimization Suggestions",
|
2364
|
+
border_style="yellow"
|
2365
|
+
)
|
2366
|
+
self.console.print(suggestion_panel)
|
2367
|
+
|
2368
|
+
def _display_enhanced_performance_summary(self):
|
2369
|
+
"""Display comprehensive performance summary with DORA metrics."""
|
2370
|
+
# Create detailed performance table
|
2371
|
+
perf_table = Table(title="🚀 Enhanced VPC Analysis Performance Summary")
|
2372
|
+
perf_table.add_column("Performance Metric", style="cyan", justify="left")
|
2373
|
+
perf_table.add_column("Current Value", style="white", justify="right")
|
2374
|
+
perf_table.add_column("Target/Status", style="white", justify="center")
|
2375
|
+
perf_table.add_column("Efficiency", style="white", justify="right")
|
2376
|
+
|
2377
|
+
# Execution time metrics
|
2378
|
+
execution_time = self.performance_metrics.total_execution_time
|
2379
|
+
time_status = "🟢" if execution_time <= 30.0 else "🟡" if execution_time <= 45.0 else "🔴"
|
2380
|
+
time_efficiency = max(0, (1 - execution_time / 30.0) * 100) if execution_time > 0 else 100
|
2381
|
+
|
2382
|
+
perf_table.add_row(
|
2383
|
+
"Total Execution Time",
|
2384
|
+
f"{execution_time:.2f}s",
|
2385
|
+
f"{time_status} ≤30s",
|
2386
|
+
f"{time_efficiency:.1f}%"
|
2387
|
+
)
|
2388
|
+
|
2389
|
+
# VPC throughput
|
2390
|
+
vpcs_per_second = (self.performance_metrics.total_vpcs_analyzed / max(execution_time, 1)) if execution_time > 0 else 0
|
2391
|
+
perf_table.add_row(
|
2392
|
+
"VPC Analysis Throughput",
|
2393
|
+
f"{vpcs_per_second:.2f} VPCs/s",
|
2394
|
+
"📊",
|
2395
|
+
f"{min(100, vpcs_per_second * 10):.1f}%"
|
2396
|
+
)
|
2397
|
+
|
2398
|
+
# Cache performance
|
2399
|
+
if self.analysis_cache:
|
2400
|
+
cache_ratio = self.performance_metrics.get_cache_hit_ratio()
|
2401
|
+
cache_status = "🟢" if cache_ratio >= 0.2 else "🟡" if cache_ratio >= 0.1 else "🔴"
|
2402
|
+
perf_table.add_row(
|
2403
|
+
"Cache Hit Ratio",
|
2404
|
+
f"{cache_ratio:.1%}",
|
2405
|
+
f"{cache_status} ≥20%",
|
2406
|
+
f"{min(100, cache_ratio * 100):.1f}%"
|
2407
|
+
)
|
2408
|
+
|
2409
|
+
# Parallel processing efficiency
|
2410
|
+
if self.performance_metrics.parallel_operations > 0:
|
2411
|
+
parallel_efficiency = min(100, (self.performance_metrics.parallel_operations / max(self.max_workers, 1)) * 100)
|
2412
|
+
perf_table.add_row(
|
2413
|
+
"Parallel Efficiency",
|
2414
|
+
f"{self.performance_metrics.parallel_operations} ops",
|
2415
|
+
f"⚡ {self.max_workers} workers",
|
2416
|
+
f"{parallel_efficiency:.1f}%"
|
2417
|
+
)
|
2418
|
+
|
2419
|
+
# API efficiency
|
2420
|
+
total_api_calls = self.performance_metrics.api_calls_made + self.performance_metrics.api_calls_cached
|
2421
|
+
if total_api_calls > 0:
|
2422
|
+
api_efficiency = (self.performance_metrics.api_calls_cached / total_api_calls) * 100
|
2423
|
+
api_status = "🟢" if api_efficiency >= 20 else "🟡" if api_efficiency >= 10 else "🔴"
|
2424
|
+
perf_table.add_row(
|
2425
|
+
"API Call Efficiency",
|
2426
|
+
f"{api_efficiency:.1f}%",
|
2427
|
+
f"{api_status} ≥20%",
|
2428
|
+
f"{api_efficiency:.1f}%"
|
2429
|
+
)
|
2430
|
+
|
2431
|
+
# Error rate and reliability
|
2432
|
+
error_rate = self.performance_metrics.get_error_rate()
|
2433
|
+
reliability = (1 - error_rate) * 100
|
2434
|
+
reliability_status = "🟢" if error_rate == 0 else "🟡" if error_rate <= 0.01 else "🔴"
|
2435
|
+
|
2436
|
+
perf_table.add_row(
|
2437
|
+
"System Reliability",
|
2438
|
+
f"{reliability:.2f}%",
|
2439
|
+
f"{reliability_status} >99%",
|
2440
|
+
f"{reliability:.1f}%"
|
2441
|
+
)
|
2442
|
+
|
2443
|
+
self.console.print(perf_table)
|
2444
|
+
|
2445
|
+
# DORA metrics summary
|
2446
|
+
self._display_dora_metrics_summary()
|
2447
|
+
|
2448
|
+
def _display_dora_metrics_summary(self):
|
2449
|
+
"""Display DORA metrics summary for compliance tracking."""
|
2450
|
+
dora_table = Table(title="📈 DORA Metrics Summary")
|
2451
|
+
dora_table.add_column("DORA Metric", style="cyan", justify="left")
|
2452
|
+
dora_table.add_column("Current Value", style="white", justify="right")
|
2453
|
+
dora_table.add_column("Target", style="white", justify="right")
|
2454
|
+
dora_table.add_column("Status", style="white", justify="center")
|
2455
|
+
|
2456
|
+
# Lead Time (analysis completion time)
|
2457
|
+
lead_time = self.performance_metrics.total_execution_time / 60 # minutes
|
2458
|
+
lead_time_status = "🟢" if lead_time <= 0.5 else "🟡" if lead_time <= 1.0 else "🔴"
|
2459
|
+
|
2460
|
+
dora_table.add_row(
|
2461
|
+
"Lead Time",
|
2462
|
+
f"{lead_time:.1f} min",
|
2463
|
+
"≤0.5 min",
|
2464
|
+
lead_time_status
|
2465
|
+
)
|
2466
|
+
|
2467
|
+
# Deployment Frequency (analysis frequency)
|
2468
|
+
deployment_freq = "On-demand"
|
2469
|
+
dora_table.add_row(
|
2470
|
+
"Analysis Frequency",
|
2471
|
+
deployment_freq,
|
2472
|
+
"On-demand",
|
2473
|
+
"🟢"
|
2474
|
+
)
|
2475
|
+
|
2476
|
+
# Change Failure Rate
|
2477
|
+
failure_rate = self.performance_metrics.get_error_rate() * 100
|
2478
|
+
failure_status = "🟢" if failure_rate == 0 else "🟡" if failure_rate <= 1 else "🔴"
|
2479
|
+
|
2480
|
+
dora_table.add_row(
|
2481
|
+
"Change Failure Rate",
|
2482
|
+
f"{failure_rate:.1f}%",
|
2483
|
+
"≤1%",
|
2484
|
+
failure_status
|
2485
|
+
)
|
2486
|
+
|
2487
|
+
# Mean Time to Recovery (theoretical)
|
2488
|
+
mttr_status = "🟢" if hasattr(self, 'rollback_procedures') else "🟡"
|
2489
|
+
dora_table.add_row(
|
2490
|
+
"Mean Time to Recovery",
|
2491
|
+
"≤5 min",
|
2492
|
+
"≤15 min",
|
2493
|
+
mttr_status
|
2494
|
+
)
|
2495
|
+
|
2496
|
+
self.console.print(dora_table)
|
2497
|
+
|
2498
|
+
def _log_dora_metrics(self, start_time: float, vpcs_analyzed: int, success: bool, error_msg: str = ""):
|
2499
|
+
"""Log DORA metrics for compliance tracking."""
|
2500
|
+
metrics_data = {
|
2501
|
+
"timestamp": datetime.now().isoformat(),
|
2502
|
+
"module": "vpc_cleanup",
|
2503
|
+
"operation": "vpc_analysis",
|
2504
|
+
"lead_time_seconds": time.time() - start_time,
|
2505
|
+
"vpcs_analyzed": vpcs_analyzed,
|
2506
|
+
"success": success,
|
2507
|
+
"error_message": error_msg,
|
2508
|
+
"parallel_workers": self.max_workers,
|
2509
|
+
"caching_enabled": self.enable_caching,
|
2510
|
+
"performance_metrics": {
|
2511
|
+
"total_execution_time": self.performance_metrics.total_execution_time,
|
2512
|
+
"cache_hit_ratio": self.performance_metrics.get_cache_hit_ratio(),
|
2513
|
+
"error_rate": self.performance_metrics.get_error_rate(),
|
2514
|
+
"parallel_operations": self.performance_metrics.parallel_operations
|
2515
|
+
}
|
2516
|
+
}
|
2517
|
+
|
2518
|
+
# Store metrics for external monitoring systems
|
2519
|
+
logger.info(f"DORA_METRICS: {json.dumps(metrics_data)}")
|
2520
|
+
|
2521
|
+
def _enhanced_fallback_analysis(self, vpc_ids: Optional[List[str]], account_profiles: Optional[List[str]]) -> List[VPCCleanupCandidate]:
|
2522
|
+
"""Enhanced fallback analysis with performance preservation where possible."""
|
2523
|
+
self.console.print("[yellow]🔄 Initiating enhanced fallback analysis with performance optimization...[/yellow]")
|
2524
|
+
|
2525
|
+
# Preserve caching but disable parallel processing for reliability
|
2526
|
+
original_parallel = self.enable_parallel_processing
|
2527
|
+
|
2528
|
+
try:
|
2529
|
+
# Reduce parallel workers but keep some parallelism if possible
|
2530
|
+
if self.max_workers > 5:
|
2531
|
+
self.max_workers = max(2, self.max_workers // 2)
|
2532
|
+
self.console.print(f"[yellow]📉 Reduced thread pool to {self.max_workers} workers for reliability[/yellow]")
|
2533
|
+
else:
|
2534
|
+
self.enable_parallel_processing = False
|
2535
|
+
self.console.print("[yellow]📉 Disabled parallel processing for maximum reliability[/yellow]")
|
2536
|
+
|
2537
|
+
# Keep caching enabled for performance
|
2538
|
+
self.console.print("[green]💾 Maintaining cache for performance during fallback[/green]")
|
2539
|
+
|
2540
|
+
# Use optimized methods with reduced complexity
|
2541
|
+
if account_profiles and len(account_profiles) > 1:
|
2542
|
+
return self._analyze_multi_account_vpcs_optimized(account_profiles, vpc_ids)
|
2543
|
+
else:
|
2544
|
+
return self._analyze_single_account_vpcs_optimized(vpc_ids)
|
2545
|
+
|
2546
|
+
except Exception as e:
|
2547
|
+
self.console.print("[red]❌ Enhanced fallback failed, reverting to basic analysis[/red]")
|
2548
|
+
# Final fallback to original methods
|
2549
|
+
self.enable_parallel_processing = False
|
2550
|
+
self.enable_caching = False
|
2551
|
+
|
2552
|
+
if account_profiles and len(account_profiles) > 1:
|
2553
|
+
return self._analyze_multi_account_vpcs(account_profiles, vpc_ids)
|
2554
|
+
else:
|
2555
|
+
return self._analyze_single_account_vpcs(vpc_ids)
|
2556
|
+
|
2557
|
+
finally:
|
2558
|
+
# Restore original settings
|
2559
|
+
self.enable_parallel_processing = original_parallel
|
2560
|
+
|
2561
|
+
def get_comprehensive_health_status(self) -> Dict[str, Any]:
|
2562
|
+
"""Get comprehensive health status with performance and reliability metrics."""
|
2563
|
+
circuit_breaker_status = {}
|
2564
|
+
for name, cb in self.circuit_breakers.items():
|
2565
|
+
circuit_breaker_status[name] = {
|
2566
|
+
'state': cb.state,
|
2567
|
+
'failure_count': cb.failure_count,
|
2568
|
+
'last_failure': cb.last_failure_time,
|
2569
|
+
'reliability': max(0, (1 - cb.failure_count / cb.failure_threshold)) * 100
|
2570
|
+
}
|
2571
|
+
|
2572
|
+
# Calculate overall system health score
|
2573
|
+
health_score = 100
|
2574
|
+
|
2575
|
+
if not self.session:
|
2576
|
+
health_score -= 30
|
2577
|
+
|
2578
|
+
error_rate = self.performance_metrics.get_error_rate()
|
2579
|
+
if error_rate > 0.1:
|
2580
|
+
health_score -= 20
|
2581
|
+
elif error_rate > 0.05:
|
2582
|
+
health_score -= 10
|
2583
|
+
|
2584
|
+
open_circuits = len([cb for cb in self.circuit_breakers.values() if cb.state == "open"])
|
2585
|
+
if open_circuits > 0:
|
2586
|
+
health_score -= open_circuits * 15
|
2587
|
+
|
2588
|
+
cache_health = 100
|
2589
|
+
if self.analysis_cache:
|
2590
|
+
cache_size = len(self.analysis_cache.vpc_data)
|
2591
|
+
if cache_size > 0:
|
2592
|
+
valid_entries = sum(1 for vpc_id in self.analysis_cache.vpc_data.keys()
|
2593
|
+
if self.analysis_cache.is_valid(vpc_id))
|
2594
|
+
cache_health = (valid_entries / cache_size) * 100
|
2595
|
+
|
2596
|
+
return {
|
2597
|
+
'timestamp': datetime.now().isoformat(),
|
2598
|
+
'overall_health_score': max(0, health_score),
|
2599
|
+
'aws_session_healthy': self.session is not None,
|
2600
|
+
'parallel_processing_enabled': self.enable_parallel_processing,
|
2601
|
+
'parallel_workers': self.max_workers,
|
2602
|
+
'caching_enabled': self.enable_caching,
|
2603
|
+
'cache_health_percentage': cache_health,
|
2604
|
+
'circuit_breakers': circuit_breaker_status,
|
2605
|
+
'performance_metrics': {
|
2606
|
+
'total_vpcs_analyzed': self.performance_metrics.total_vpcs_analyzed,
|
2607
|
+
'error_rate': error_rate,
|
2608
|
+
'cache_hit_ratio': self.performance_metrics.get_cache_hit_ratio(),
|
2609
|
+
'average_analysis_time': self.performance_metrics.average_vpc_analysis_time,
|
2610
|
+
'parallel_operations_completed': self.performance_metrics.parallel_operations,
|
2611
|
+
'api_call_efficiency': (
|
2612
|
+
self.performance_metrics.api_calls_cached /
|
2613
|
+
max(1, self.performance_metrics.api_calls_made + self.performance_metrics.api_calls_cached)
|
2614
|
+
) * 100
|
2615
|
+
},
|
2616
|
+
'thread_pool_healthy': self.executor is not None if self.enable_parallel_processing else True,
|
2617
|
+
'rollback_procedures_available': len(self.rollback_procedures),
|
2618
|
+
'reliability_metrics': {
|
2619
|
+
'uptime_percentage': max(0, 100 - error_rate * 100),
|
2620
|
+
'mttr_estimate_minutes': 5, # Based on circuit breaker recovery
|
2621
|
+
'availability_target': 99.9,
|
2622
|
+
'performance_target_seconds': 30
|
2623
|
+
}
|
2624
|
+
}
|
2625
|
+
|
2626
|
+
def __del__(self):
|
2627
|
+
"""Cleanup resources when framework is destroyed."""
|
2628
|
+
if self.executor:
|
2629
|
+
self.executor.shutdown(wait=True)
|