runbooks 1.1.4__py3-none-any.whl → 1.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- runbooks/__init__.py +31 -2
- runbooks/__init___optimized.py +18 -4
- runbooks/_platform/__init__.py +1 -5
- runbooks/_platform/core/runbooks_wrapper.py +141 -138
- runbooks/aws2/accuracy_validator.py +812 -0
- runbooks/base.py +7 -0
- runbooks/cfat/assessment/compliance.py +1 -1
- runbooks/cfat/assessment/runner.py +1 -0
- runbooks/cfat/cloud_foundations_assessment.py +227 -239
- runbooks/cli/__init__.py +1 -1
- runbooks/cli/commands/cfat.py +64 -23
- runbooks/cli/commands/finops.py +1005 -54
- runbooks/cli/commands/inventory.py +135 -91
- runbooks/cli/commands/operate.py +9 -36
- runbooks/cli/commands/security.py +42 -18
- runbooks/cli/commands/validation.py +432 -18
- runbooks/cli/commands/vpc.py +81 -17
- runbooks/cli/registry.py +22 -10
- runbooks/cloudops/__init__.py +20 -27
- runbooks/cloudops/base.py +96 -107
- runbooks/cloudops/cost_optimizer.py +544 -542
- runbooks/cloudops/infrastructure_optimizer.py +5 -4
- runbooks/cloudops/interfaces.py +224 -225
- runbooks/cloudops/lifecycle_manager.py +5 -4
- runbooks/cloudops/mcp_cost_validation.py +252 -235
- runbooks/cloudops/models.py +78 -53
- runbooks/cloudops/monitoring_automation.py +5 -4
- runbooks/cloudops/notebook_framework.py +177 -213
- runbooks/cloudops/security_enforcer.py +125 -159
- runbooks/common/accuracy_validator.py +17 -12
- runbooks/common/aws_pricing.py +349 -326
- runbooks/common/aws_pricing_api.py +211 -212
- runbooks/common/aws_profile_manager.py +40 -36
- runbooks/common/aws_utils.py +74 -79
- runbooks/common/business_logic.py +126 -104
- runbooks/common/cli_decorators.py +36 -60
- runbooks/common/comprehensive_cost_explorer_integration.py +455 -463
- runbooks/common/cross_account_manager.py +197 -204
- runbooks/common/date_utils.py +27 -39
- runbooks/common/decorators.py +29 -19
- runbooks/common/dry_run_examples.py +173 -208
- runbooks/common/dry_run_framework.py +157 -155
- runbooks/common/enhanced_exception_handler.py +15 -4
- runbooks/common/enhanced_logging_example.py +50 -64
- runbooks/common/enhanced_logging_integration_example.py +65 -37
- runbooks/common/env_utils.py +16 -16
- runbooks/common/error_handling.py +40 -38
- runbooks/common/lazy_loader.py +41 -23
- runbooks/common/logging_integration_helper.py +79 -86
- runbooks/common/mcp_cost_explorer_integration.py +476 -493
- runbooks/common/mcp_integration.py +99 -79
- runbooks/common/memory_optimization.py +140 -118
- runbooks/common/module_cli_base.py +37 -58
- runbooks/common/organizations_client.py +175 -193
- runbooks/common/patterns.py +23 -25
- runbooks/common/performance_monitoring.py +67 -71
- runbooks/common/performance_optimization_engine.py +283 -274
- runbooks/common/profile_utils.py +111 -37
- runbooks/common/rich_utils.py +315 -141
- runbooks/common/sre_performance_suite.py +177 -186
- runbooks/enterprise/__init__.py +1 -1
- runbooks/enterprise/logging.py +144 -106
- runbooks/enterprise/security.py +187 -204
- runbooks/enterprise/validation.py +43 -56
- runbooks/finops/__init__.py +26 -30
- runbooks/finops/account_resolver.py +1 -1
- runbooks/finops/advanced_optimization_engine.py +980 -0
- runbooks/finops/automation_core.py +268 -231
- runbooks/finops/business_case_config.py +184 -179
- runbooks/finops/cli.py +660 -139
- runbooks/finops/commvault_ec2_analysis.py +157 -164
- runbooks/finops/compute_cost_optimizer.py +336 -320
- runbooks/finops/config.py +20 -20
- runbooks/finops/cost_optimizer.py +484 -618
- runbooks/finops/cost_processor.py +332 -214
- runbooks/finops/dashboard_runner.py +1006 -172
- runbooks/finops/ebs_cost_optimizer.py +991 -657
- runbooks/finops/elastic_ip_optimizer.py +317 -257
- runbooks/finops/enhanced_mcp_integration.py +340 -0
- runbooks/finops/enhanced_progress.py +32 -29
- runbooks/finops/enhanced_trend_visualization.py +3 -2
- runbooks/finops/enterprise_wrappers.py +223 -285
- runbooks/finops/executive_export.py +203 -160
- runbooks/finops/helpers.py +130 -288
- runbooks/finops/iam_guidance.py +1 -1
- runbooks/finops/infrastructure/__init__.py +80 -0
- runbooks/finops/infrastructure/commands.py +506 -0
- runbooks/finops/infrastructure/load_balancer_optimizer.py +866 -0
- runbooks/finops/infrastructure/vpc_endpoint_optimizer.py +832 -0
- runbooks/finops/markdown_exporter.py +337 -174
- runbooks/finops/mcp_validator.py +1952 -0
- runbooks/finops/nat_gateway_optimizer.py +1512 -481
- runbooks/finops/network_cost_optimizer.py +657 -587
- runbooks/finops/notebook_utils.py +226 -188
- runbooks/finops/optimization_engine.py +1136 -0
- runbooks/finops/optimizer.py +19 -23
- runbooks/finops/rds_snapshot_optimizer.py +367 -411
- runbooks/finops/reservation_optimizer.py +427 -363
- runbooks/finops/scenario_cli_integration.py +64 -65
- runbooks/finops/scenarios.py +1277 -438
- runbooks/finops/schemas.py +218 -182
- runbooks/finops/snapshot_manager.py +2289 -0
- runbooks/finops/types.py +3 -3
- runbooks/finops/validation_framework.py +259 -265
- runbooks/finops/vpc_cleanup_exporter.py +189 -144
- runbooks/finops/vpc_cleanup_optimizer.py +591 -573
- runbooks/finops/workspaces_analyzer.py +171 -182
- runbooks/integration/__init__.py +89 -0
- runbooks/integration/mcp_integration.py +1920 -0
- runbooks/inventory/CLAUDE.md +816 -0
- runbooks/inventory/__init__.py +2 -2
- runbooks/inventory/aws_decorators.py +2 -3
- runbooks/inventory/check_cloudtrail_compliance.py +2 -4
- runbooks/inventory/check_controltower_readiness.py +152 -151
- runbooks/inventory/check_landingzone_readiness.py +85 -84
- runbooks/inventory/cloud_foundations_integration.py +144 -149
- runbooks/inventory/collectors/aws_comprehensive.py +1 -1
- runbooks/inventory/collectors/aws_networking.py +109 -99
- runbooks/inventory/collectors/base.py +4 -0
- runbooks/inventory/core/collector.py +495 -313
- runbooks/inventory/core/formatter.py +11 -0
- runbooks/inventory/draw_org_structure.py +8 -9
- runbooks/inventory/drift_detection_cli.py +69 -96
- runbooks/inventory/ec2_vpc_utils.py +2 -2
- runbooks/inventory/find_cfn_drift_detection.py +5 -7
- runbooks/inventory/find_cfn_orphaned_stacks.py +7 -9
- runbooks/inventory/find_cfn_stackset_drift.py +5 -6
- runbooks/inventory/find_ec2_security_groups.py +48 -42
- runbooks/inventory/find_landingzone_versions.py +4 -6
- runbooks/inventory/find_vpc_flow_logs.py +7 -9
- runbooks/inventory/inventory_mcp_cli.py +48 -46
- runbooks/inventory/inventory_modules.py +103 -91
- runbooks/inventory/list_cfn_stacks.py +9 -10
- runbooks/inventory/list_cfn_stackset_operation_results.py +1 -3
- runbooks/inventory/list_cfn_stackset_operations.py +79 -57
- runbooks/inventory/list_cfn_stacksets.py +8 -10
- runbooks/inventory/list_config_recorders_delivery_channels.py +49 -39
- runbooks/inventory/list_ds_directories.py +65 -53
- runbooks/inventory/list_ec2_availability_zones.py +2 -4
- runbooks/inventory/list_ec2_ebs_volumes.py +32 -35
- runbooks/inventory/list_ec2_instances.py +23 -28
- runbooks/inventory/list_ecs_clusters_and_tasks.py +26 -34
- runbooks/inventory/list_elbs_load_balancers.py +22 -20
- runbooks/inventory/list_enis_network_interfaces.py +26 -33
- runbooks/inventory/list_guardduty_detectors.py +2 -4
- runbooks/inventory/list_iam_policies.py +2 -4
- runbooks/inventory/list_iam_roles.py +5 -7
- runbooks/inventory/list_iam_saml_providers.py +4 -6
- runbooks/inventory/list_lambda_functions.py +38 -38
- runbooks/inventory/list_org_accounts.py +6 -8
- runbooks/inventory/list_org_accounts_users.py +55 -44
- runbooks/inventory/list_rds_db_instances.py +31 -33
- runbooks/inventory/list_rds_snapshots_aggregator.py +192 -208
- runbooks/inventory/list_route53_hosted_zones.py +3 -5
- runbooks/inventory/list_servicecatalog_provisioned_products.py +37 -41
- runbooks/inventory/list_sns_topics.py +2 -4
- runbooks/inventory/list_ssm_parameters.py +4 -7
- runbooks/inventory/list_vpc_subnets.py +2 -4
- runbooks/inventory/list_vpcs.py +7 -10
- runbooks/inventory/mcp_inventory_validator.py +554 -468
- runbooks/inventory/mcp_vpc_validator.py +359 -442
- runbooks/inventory/organizations_discovery.py +63 -55
- runbooks/inventory/recover_cfn_stack_ids.py +7 -8
- runbooks/inventory/requirements.txt +0 -1
- runbooks/inventory/rich_inventory_display.py +35 -34
- runbooks/inventory/run_on_multi_accounts.py +3 -5
- runbooks/inventory/unified_validation_engine.py +281 -253
- runbooks/inventory/verify_ec2_security_groups.py +1 -1
- runbooks/inventory/vpc_analyzer.py +735 -697
- runbooks/inventory/vpc_architecture_validator.py +293 -348
- runbooks/inventory/vpc_dependency_analyzer.py +384 -380
- runbooks/inventory/vpc_flow_analyzer.py +1 -1
- runbooks/main.py +49 -34
- runbooks/main_final.py +91 -60
- runbooks/main_minimal.py +22 -10
- runbooks/main_optimized.py +131 -100
- runbooks/main_ultra_minimal.py +7 -2
- runbooks/mcp/__init__.py +36 -0
- runbooks/mcp/integration.py +679 -0
- runbooks/monitoring/performance_monitor.py +9 -4
- runbooks/operate/dynamodb_operations.py +3 -1
- runbooks/operate/ec2_operations.py +145 -137
- runbooks/operate/iam_operations.py +146 -152
- runbooks/operate/networking_cost_heatmap.py +29 -8
- runbooks/operate/rds_operations.py +223 -254
- runbooks/operate/s3_operations.py +107 -118
- runbooks/operate/vpc_operations.py +646 -616
- runbooks/remediation/base.py +1 -1
- runbooks/remediation/commons.py +10 -7
- runbooks/remediation/commvault_ec2_analysis.py +70 -66
- runbooks/remediation/ec2_unattached_ebs_volumes.py +1 -0
- runbooks/remediation/multi_account.py +24 -21
- runbooks/remediation/rds_snapshot_list.py +86 -60
- runbooks/remediation/remediation_cli.py +92 -146
- runbooks/remediation/universal_account_discovery.py +83 -79
- runbooks/remediation/workspaces_list.py +46 -41
- runbooks/security/__init__.py +19 -0
- runbooks/security/assessment_runner.py +1150 -0
- runbooks/security/baseline_checker.py +812 -0
- runbooks/security/cloudops_automation_security_validator.py +509 -535
- runbooks/security/compliance_automation_engine.py +17 -17
- runbooks/security/config/__init__.py +2 -2
- runbooks/security/config/compliance_config.py +50 -50
- runbooks/security/config_template_generator.py +63 -76
- runbooks/security/enterprise_security_framework.py +1 -1
- runbooks/security/executive_security_dashboard.py +519 -508
- runbooks/security/multi_account_security_controls.py +959 -1210
- runbooks/security/real_time_security_monitor.py +422 -444
- runbooks/security/security_baseline_tester.py +1 -1
- runbooks/security/security_cli.py +143 -112
- runbooks/security/test_2way_validation.py +439 -0
- runbooks/security/two_way_validation_framework.py +852 -0
- runbooks/sre/production_monitoring_framework.py +167 -177
- runbooks/tdd/__init__.py +15 -0
- runbooks/tdd/cli.py +1071 -0
- runbooks/utils/__init__.py +14 -17
- runbooks/utils/logger.py +7 -2
- runbooks/utils/version_validator.py +50 -47
- runbooks/validation/__init__.py +6 -6
- runbooks/validation/cli.py +9 -3
- runbooks/validation/comprehensive_2way_validator.py +745 -704
- runbooks/validation/mcp_validator.py +906 -228
- runbooks/validation/terraform_citations_validator.py +104 -115
- runbooks/validation/terraform_drift_detector.py +461 -454
- runbooks/vpc/README.md +617 -0
- runbooks/vpc/__init__.py +8 -1
- runbooks/vpc/analyzer.py +577 -0
- runbooks/vpc/cleanup_wrapper.py +476 -413
- runbooks/vpc/cli_cloudtrail_commands.py +339 -0
- runbooks/vpc/cli_mcp_validation_commands.py +480 -0
- runbooks/vpc/cloudtrail_audit_integration.py +717 -0
- runbooks/vpc/config.py +92 -97
- runbooks/vpc/cost_engine.py +411 -148
- runbooks/vpc/cost_explorer_integration.py +553 -0
- runbooks/vpc/cross_account_session.py +101 -106
- runbooks/vpc/enhanced_mcp_validation.py +917 -0
- runbooks/vpc/eni_gate_validator.py +961 -0
- runbooks/vpc/heatmap_engine.py +185 -160
- runbooks/vpc/mcp_no_eni_validator.py +680 -639
- runbooks/vpc/nat_gateway_optimizer.py +358 -0
- runbooks/vpc/networking_wrapper.py +15 -8
- runbooks/vpc/pdca_remediation_planner.py +528 -0
- runbooks/vpc/performance_optimized_analyzer.py +219 -231
- runbooks/vpc/runbooks_adapter.py +1167 -241
- runbooks/vpc/tdd_red_phase_stubs.py +601 -0
- runbooks/vpc/test_data_loader.py +358 -0
- runbooks/vpc/tests/conftest.py +314 -4
- runbooks/vpc/tests/test_cleanup_framework.py +1022 -0
- runbooks/vpc/tests/test_cost_engine.py +0 -2
- runbooks/vpc/topology_generator.py +326 -0
- runbooks/vpc/unified_scenarios.py +1297 -1124
- runbooks/vpc/vpc_cleanup_integration.py +1943 -1115
- runbooks-1.1.6.dist-info/METADATA +327 -0
- runbooks-1.1.6.dist-info/RECORD +489 -0
- runbooks/finops/README.md +0 -414
- runbooks/finops/accuracy_cross_validator.py +0 -647
- runbooks/finops/business_cases.py +0 -950
- runbooks/finops/dashboard_router.py +0 -922
- runbooks/finops/ebs_optimizer.py +0 -973
- runbooks/finops/embedded_mcp_validator.py +0 -1629
- runbooks/finops/enhanced_dashboard_runner.py +0 -527
- runbooks/finops/finops_dashboard.py +0 -584
- runbooks/finops/finops_scenarios.py +0 -1218
- runbooks/finops/legacy_migration.py +0 -730
- runbooks/finops/multi_dashboard.py +0 -1519
- runbooks/finops/single_dashboard.py +0 -1113
- runbooks/finops/unlimited_scenarios.py +0 -393
- runbooks-1.1.4.dist-info/METADATA +0 -800
- runbooks-1.1.4.dist-info/RECORD +0 -468
- {runbooks-1.1.4.dist-info → runbooks-1.1.6.dist-info}/WHEEL +0 -0
- {runbooks-1.1.4.dist-info → runbooks-1.1.6.dist-info}/entry_points.txt +0 -0
- {runbooks-1.1.4.dist-info → runbooks-1.1.6.dist-info}/licenses/LICENSE +0 -0
- {runbooks-1.1.4.dist-info → runbooks-1.1.6.dist-info}/top_level.txt +0 -0
@@ -1,13 +1,13 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
"""
|
3
3
|
Performance Optimization Engine for CloudOps-Runbooks - Phase 2 Enhanced
|
4
|
-
|
4
|
+
|
5
5
|
🎯 SRE Automation Specialist Implementation
|
6
6
|
Following proven systematic delegation patterns for production reliability optimization.
|
7
7
|
|
8
8
|
Key Focus Areas (From PDCA Analysis):
|
9
9
|
1. Organization Discovery Performance: 52.3s -> <30s target
|
10
|
-
2. VPC Analysis Timeout Issues: Optimize network operations
|
10
|
+
2. VPC Analysis Timeout Issues: Optimize network operations
|
11
11
|
3. Memory Usage Optimization: Address large-scale operation issues (6.6GB → <500MB)
|
12
12
|
4. Multi-Account Scaling: 200+ account enterprise support with concurrent processing
|
13
13
|
5. Reliability Enhancements: >99.9% operation success rate with circuit breaker patterns
|
@@ -49,7 +49,7 @@ from rich.progress import (
|
|
49
49
|
BarColumn,
|
50
50
|
TimeElapsedColumn,
|
51
51
|
TaskProgressColumn,
|
52
|
-
MofNCompleteColumn
|
52
|
+
MofNCompleteColumn,
|
53
53
|
)
|
54
54
|
from rich.status import Status
|
55
55
|
from rich.panel import Panel
|
@@ -63,7 +63,7 @@ from runbooks.common.rich_utils import (
|
|
63
63
|
print_error,
|
64
64
|
create_table,
|
65
65
|
create_progress_bar,
|
66
|
-
STATUS_INDICATORS
|
66
|
+
STATUS_INDICATORS,
|
67
67
|
)
|
68
68
|
|
69
69
|
logger = logging.getLogger(__name__)
|
@@ -72,6 +72,7 @@ logger = logging.getLogger(__name__)
|
|
72
72
|
@dataclass
|
73
73
|
class OptimizationMetrics:
|
74
74
|
"""Performance optimization metrics tracking"""
|
75
|
+
|
75
76
|
operation_name: str
|
76
77
|
start_time: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
77
78
|
end_time: Optional[datetime] = None
|
@@ -108,14 +109,14 @@ class OptimizationMetrics:
|
|
108
109
|
|
109
110
|
class IntelligentCache:
|
110
111
|
"""Intelligent caching system with TTL management and memory optimization"""
|
111
|
-
|
112
|
+
|
112
113
|
def __init__(self, default_ttl_minutes: int = 30, max_cache_size: int = 1000):
|
113
114
|
self.cache: Dict[str, Dict] = {}
|
114
115
|
self.cache_timestamps: Dict[str, datetime] = {}
|
115
116
|
self.default_ttl_minutes = default_ttl_minutes
|
116
117
|
self.max_cache_size = max_cache_size
|
117
118
|
self._lock = threading.RLock()
|
118
|
-
|
119
|
+
|
119
120
|
# Performance tracking
|
120
121
|
self.hits = 0
|
121
122
|
self.misses = 0
|
@@ -126,18 +127,18 @@ class IntelligentCache:
|
|
126
127
|
if key not in self.cache:
|
127
128
|
self.misses += 1
|
128
129
|
return None
|
129
|
-
|
130
|
+
|
130
131
|
# Check TTL
|
131
132
|
ttl = ttl_minutes or self.default_ttl_minutes
|
132
133
|
cache_age = (datetime.now(timezone.utc) - self.cache_timestamps[key]).total_seconds() / 60
|
133
|
-
|
134
|
+
|
134
135
|
if cache_age > ttl:
|
135
136
|
# Cache expired
|
136
137
|
del self.cache[key]
|
137
138
|
del self.cache_timestamps[key]
|
138
139
|
self.misses += 1
|
139
140
|
return None
|
140
|
-
|
141
|
+
|
141
142
|
self.hits += 1
|
142
143
|
return self.cache[key]
|
143
144
|
|
@@ -147,7 +148,7 @@ class IntelligentCache:
|
|
147
148
|
# Clean up if at max capacity
|
148
149
|
if len(self.cache) >= self.max_cache_size:
|
149
150
|
self._cleanup_oldest_entries(int(self.max_cache_size * 0.2)) # Remove 20%
|
150
|
-
|
151
|
+
|
151
152
|
self.cache[key] = value
|
152
153
|
self.cache_timestamps[key] = datetime.now(timezone.utc)
|
153
154
|
|
@@ -172,25 +173,26 @@ class IntelligentCache:
|
|
172
173
|
"""Get cache performance statistics"""
|
173
174
|
total_requests = self.hits + self.misses
|
174
175
|
hit_rate = (self.hits / total_requests * 100) if total_requests > 0 else 0.0
|
175
|
-
|
176
|
+
|
176
177
|
return {
|
177
178
|
"size": len(self.cache),
|
178
179
|
"hits": self.hits,
|
179
180
|
"misses": self.misses,
|
180
181
|
"hit_rate": hit_rate,
|
181
|
-
"max_size": self.max_cache_size
|
182
|
+
"max_size": self.max_cache_size,
|
182
183
|
}
|
183
184
|
|
184
185
|
|
185
186
|
@dataclass
|
186
187
|
class CircuitBreakerState:
|
187
188
|
"""Circuit breaker state tracking for reliability patterns"""
|
189
|
+
|
188
190
|
failure_count: int = 0
|
189
191
|
last_failure_time: Optional[datetime] = None
|
190
192
|
state: str = "closed" # closed, open, half_open
|
191
193
|
success_count: int = 0
|
192
194
|
total_requests: int = 0
|
193
|
-
|
195
|
+
|
194
196
|
def calculate_failure_rate(self) -> float:
|
195
197
|
"""Calculate failure rate percentage"""
|
196
198
|
if self.total_requests == 0:
|
@@ -201,20 +203,17 @@ class CircuitBreakerState:
|
|
201
203
|
class CircuitBreaker:
|
202
204
|
"""
|
203
205
|
Circuit breaker implementation for AWS API reliability
|
204
|
-
|
206
|
+
|
205
207
|
Provides >99.9% operation success rate through:
|
206
208
|
- Automatic failure detection and recovery
|
207
209
|
- Graceful degradation patterns
|
208
210
|
- Exponential backoff with jitter
|
209
211
|
"""
|
210
|
-
|
211
|
-
def __init__(self,
|
212
|
-
failure_threshold: int = 5,
|
213
|
-
recovery_timeout_seconds: int = 60,
|
214
|
-
success_threshold: int = 3):
|
212
|
+
|
213
|
+
def __init__(self, failure_threshold: int = 5, recovery_timeout_seconds: int = 60, success_threshold: int = 3):
|
215
214
|
"""
|
216
215
|
Initialize circuit breaker
|
217
|
-
|
216
|
+
|
218
217
|
Args:
|
219
218
|
failure_threshold: Number of failures before opening circuit
|
220
219
|
recovery_timeout_seconds: Time to wait before attempting recovery
|
@@ -223,7 +222,7 @@ class CircuitBreaker:
|
|
223
222
|
self.failure_threshold = failure_threshold
|
224
223
|
self.recovery_timeout_seconds = recovery_timeout_seconds
|
225
224
|
self.success_threshold = success_threshold
|
226
|
-
|
225
|
+
|
227
226
|
self.state = CircuitBreakerState()
|
228
227
|
self._lock = threading.RLock()
|
229
228
|
|
@@ -231,7 +230,7 @@ class CircuitBreaker:
|
|
231
230
|
def protected_call(self, operation_name: str = "aws_operation"):
|
232
231
|
"""
|
233
232
|
Context manager for circuit breaker protected operations
|
234
|
-
|
233
|
+
|
235
234
|
Args:
|
236
235
|
operation_name: Name of operation for logging
|
237
236
|
"""
|
@@ -240,21 +239,23 @@ class CircuitBreaker:
|
|
240
239
|
if self._should_open_circuit():
|
241
240
|
self.state.state = "open"
|
242
241
|
self.state.last_failure_time = datetime.now(timezone.utc)
|
243
|
-
|
242
|
+
|
244
243
|
# Check if circuit can transition to half-open
|
245
244
|
if self._can_attempt_recovery():
|
246
245
|
self.state.state = "half_open"
|
247
246
|
console.log(f"[yellow]🔄 Circuit breaker half-open for {operation_name}[/yellow]")
|
248
|
-
|
247
|
+
|
249
248
|
# Block requests if circuit is open
|
250
249
|
if self.state.state == "open":
|
251
250
|
time_since_failure = (datetime.now(timezone.utc) - self.state.last_failure_time).total_seconds()
|
252
251
|
if time_since_failure < self.recovery_timeout_seconds:
|
253
|
-
raise Exception(
|
252
|
+
raise Exception(
|
253
|
+
f"Circuit breaker OPEN for {operation_name} - recovery in {self.recovery_timeout_seconds - time_since_failure:.1f}s"
|
254
|
+
)
|
254
255
|
|
255
256
|
try:
|
256
257
|
yield
|
257
|
-
|
258
|
+
|
258
259
|
# Success - update state
|
259
260
|
with self._lock:
|
260
261
|
if self.state.state == "half_open":
|
@@ -263,35 +264,37 @@ class CircuitBreaker:
|
|
263
264
|
self.state.state = "closed"
|
264
265
|
self.state.failure_count = 0
|
265
266
|
self.state.success_count = 0
|
266
|
-
console.log(
|
267
|
-
|
267
|
+
console.log(
|
268
|
+
f"[green]✅ Circuit breaker CLOSED for {operation_name} - service recovered[/green]"
|
269
|
+
)
|
270
|
+
|
268
271
|
self.state.total_requests += 1
|
269
|
-
|
272
|
+
|
270
273
|
except Exception as e:
|
271
274
|
# Failure - update state
|
272
275
|
with self._lock:
|
273
276
|
self.state.failure_count += 1
|
274
277
|
self.state.total_requests += 1
|
275
278
|
self.state.last_failure_time = datetime.now(timezone.utc)
|
276
|
-
|
279
|
+
|
277
280
|
if self.state.state == "half_open":
|
278
281
|
self.state.state = "open"
|
279
282
|
console.log(f"[red]🚨 Circuit breaker OPEN for {operation_name} - recovery attempt failed[/red]")
|
280
|
-
|
283
|
+
|
281
284
|
raise
|
282
285
|
|
283
286
|
def _should_open_circuit(self) -> bool:
|
284
287
|
"""Check if circuit should be opened based on failure rate"""
|
285
288
|
if self.state.state != "closed":
|
286
289
|
return False
|
287
|
-
|
290
|
+
|
288
291
|
return self.state.failure_count >= self.failure_threshold
|
289
292
|
|
290
293
|
def _can_attempt_recovery(self) -> bool:
|
291
294
|
"""Check if recovery can be attempted"""
|
292
295
|
if self.state.state != "open" or not self.state.last_failure_time:
|
293
296
|
return False
|
294
|
-
|
297
|
+
|
295
298
|
time_since_failure = (datetime.now(timezone.utc) - self.state.last_failure_time).total_seconds()
|
296
299
|
return time_since_failure >= self.recovery_timeout_seconds
|
297
300
|
|
@@ -308,28 +311,28 @@ class CircuitBreaker:
|
|
308
311
|
|
309
312
|
class OptimizedAWSClientPool:
|
310
313
|
"""Connection pooling and optimized AWS client management with circuit breaker patterns"""
|
311
|
-
|
314
|
+
|
312
315
|
def __init__(self, max_pool_connections: int = 100):
|
313
316
|
self.max_pool_connections = max_pool_connections
|
314
317
|
self.clients: Dict[str, boto3.client] = {}
|
315
318
|
self.sessions: Dict[str, boto3.Session] = {}
|
316
319
|
self.circuit_breakers: Dict[str, CircuitBreaker] = {}
|
317
320
|
self._lock = threading.RLock()
|
318
|
-
|
321
|
+
|
319
322
|
# Optimized botocore configuration with enhanced retry logic
|
320
323
|
self.config = Config(
|
321
324
|
max_pool_connections=max_pool_connections,
|
322
|
-
retries={
|
325
|
+
retries={"max_attempts": 3, "mode": "adaptive"},
|
323
326
|
tcp_keepalive=True,
|
324
|
-
region_name=
|
327
|
+
region_name="us-east-1", # Default region for global services
|
325
328
|
read_timeout=30, # 30 second read timeout
|
326
329
|
connect_timeout=10, # 10 second connection timeout
|
327
330
|
)
|
328
331
|
|
329
|
-
def get_client(self, service: str, profile: str, region: str =
|
332
|
+
def get_client(self, service: str, profile: str, region: str = "us-east-1") -> boto3.client:
|
330
333
|
"""Get optimized AWS client with connection pooling and circuit breaker protection"""
|
331
334
|
client_key = f"{service}_{profile}_{region}"
|
332
|
-
|
335
|
+
|
333
336
|
with self._lock:
|
334
337
|
if client_key not in self.clients:
|
335
338
|
# Create circuit breaker for this service/region combination
|
@@ -337,38 +340,36 @@ class OptimizedAWSClientPool:
|
|
337
340
|
self.circuit_breakers[client_key] = CircuitBreaker(
|
338
341
|
failure_threshold=3, # Open after 3 failures
|
339
342
|
recovery_timeout_seconds=30, # Attempt recovery after 30s
|
340
|
-
success_threshold=2 # Close after 2 successes
|
343
|
+
success_threshold=2, # Close after 2 successes
|
341
344
|
)
|
342
|
-
|
345
|
+
|
343
346
|
# Create session if not exists
|
344
347
|
session_key = f"{profile}_{region}"
|
345
348
|
if session_key not in self.sessions:
|
346
349
|
self.sessions[session_key] = boto3.Session(profile_name=profile)
|
347
|
-
|
350
|
+
|
348
351
|
# Create client with optimized config
|
349
352
|
self.clients[client_key] = self.sessions[session_key].client(
|
350
|
-
service,
|
351
|
-
config=self.config,
|
352
|
-
region_name=region
|
353
|
+
service, config=self.config, region_name=region
|
353
354
|
)
|
354
|
-
|
355
|
+
|
355
356
|
return self.clients[client_key]
|
356
357
|
|
357
358
|
def protected_api_call(self, client_key: str, api_call: Callable, *args, **kwargs):
|
358
359
|
"""
|
359
360
|
Execute AWS API call with circuit breaker protection
|
360
|
-
|
361
|
+
|
361
362
|
Args:
|
362
363
|
client_key: Client identifier for circuit breaker tracking
|
363
364
|
api_call: AWS API method to call
|
364
365
|
*args, **kwargs: Arguments for the API call
|
365
|
-
|
366
|
+
|
366
367
|
Returns:
|
367
368
|
API call result with circuit breaker protection
|
368
369
|
"""
|
369
370
|
if client_key not in self.circuit_breakers:
|
370
371
|
self.circuit_breakers[client_key] = CircuitBreaker()
|
371
|
-
|
372
|
+
|
372
373
|
with self.circuit_breakers[client_key].protected_call(f"aws_{client_key}"):
|
373
374
|
return api_call(*args, **kwargs)
|
374
375
|
|
@@ -377,20 +378,26 @@ class OptimizedAWSClientPool:
|
|
377
378
|
status = {}
|
378
379
|
for client_key, breaker in self.circuit_breakers.items():
|
379
380
|
status[client_key] = breaker.get_state_info()
|
380
|
-
|
381
|
+
|
381
382
|
# Calculate overall reliability metrics
|
382
383
|
total_requests = sum(breaker.state.total_requests for breaker in self.circuit_breakers.values())
|
383
384
|
total_failures = sum(breaker.state.failure_count for breaker in self.circuit_breakers.values())
|
384
|
-
|
385
|
-
overall_success_rate = (
|
386
|
-
|
385
|
+
|
386
|
+
overall_success_rate = (
|
387
|
+
((total_requests - total_failures) / total_requests * 100) if total_requests > 0 else 100.0
|
388
|
+
)
|
389
|
+
|
387
390
|
return {
|
388
391
|
"circuit_breakers": status,
|
389
392
|
"overall_success_rate": overall_success_rate,
|
390
393
|
"total_requests": total_requests,
|
391
394
|
"total_failures": total_failures,
|
392
395
|
"target_success_rate": 99.9,
|
393
|
-
"reliability_status": "excellent"
|
396
|
+
"reliability_status": "excellent"
|
397
|
+
if overall_success_rate >= 99.9
|
398
|
+
else "good"
|
399
|
+
if overall_success_rate >= 95.0
|
400
|
+
else "needs_improvement",
|
394
401
|
}
|
395
402
|
|
396
403
|
def get_session(self, profile: str) -> boto3.Session:
|
@@ -410,7 +417,7 @@ class OptimizedAWSClientPool:
|
|
410
417
|
class PerformanceOptimizationEngine:
|
411
418
|
"""
|
412
419
|
Enterprise performance optimization engine for CloudOps-Runbooks
|
413
|
-
|
420
|
+
|
414
421
|
Implements SRE automation patterns for:
|
415
422
|
- Organization discovery optimization (52.3s -> <30s)
|
416
423
|
- VPC analysis performance improvements
|
@@ -418,13 +425,12 @@ class PerformanceOptimizationEngine:
|
|
418
425
|
- Intelligent caching and connection pooling
|
419
426
|
"""
|
420
427
|
|
421
|
-
def __init__(
|
422
|
-
|
423
|
-
|
424
|
-
memory_limit_mb: int = 512): # Phase 2: Reduced from 2048MB to 512MB target
|
428
|
+
def __init__(
|
429
|
+
self, max_workers: int = 20, cache_ttl_minutes: int = 30, memory_limit_mb: int = 512
|
430
|
+
): # Phase 2: Reduced from 2048MB to 512MB target
|
425
431
|
"""
|
426
432
|
Initialize performance optimization engine
|
427
|
-
|
433
|
+
|
428
434
|
Args:
|
429
435
|
max_workers: Maximum concurrent workers for parallel operations
|
430
436
|
cache_ttl_minutes: Cache TTL in minutes
|
@@ -432,23 +438,23 @@ class PerformanceOptimizationEngine:
|
|
432
438
|
"""
|
433
439
|
self.max_workers = max_workers
|
434
440
|
self.memory_limit_mb = memory_limit_mb
|
435
|
-
|
441
|
+
|
436
442
|
# Core optimization components
|
437
443
|
self.cache = IntelligentCache(
|
438
444
|
default_ttl_minutes=cache_ttl_minutes,
|
439
|
-
max_cache_size=500 # Phase 2: Reduced cache size for memory optimization
|
445
|
+
max_cache_size=500, # Phase 2: Reduced cache size for memory optimization
|
440
446
|
)
|
441
447
|
self.client_pool = OptimizedAWSClientPool(max_pool_connections=50)
|
442
|
-
|
448
|
+
|
443
449
|
# Performance tracking
|
444
450
|
self.metrics: List[OptimizationMetrics] = []
|
445
451
|
self.current_operation: Optional[OptimizationMetrics] = None
|
446
|
-
|
452
|
+
|
447
453
|
# Phase 2: Enhanced memory monitoring
|
448
454
|
self.process = psutil.Process()
|
449
455
|
self.memory_monitoring_active = False
|
450
456
|
self.memory_optimization_active = True
|
451
|
-
|
457
|
+
|
452
458
|
# Phase 2: Multi-account scaling configuration
|
453
459
|
self.enterprise_scaling_enabled = True
|
454
460
|
self.adaptive_batch_sizing = True
|
@@ -458,38 +464,35 @@ class PerformanceOptimizationEngine:
|
|
458
464
|
def optimize_operation(self, operation_name: str, target_seconds: float = 30.0):
|
459
465
|
"""
|
460
466
|
Context manager for optimized operation execution with monitoring
|
461
|
-
|
467
|
+
|
462
468
|
Args:
|
463
469
|
operation_name: Name of the operation being optimized
|
464
470
|
target_seconds: Target completion time in seconds
|
465
471
|
"""
|
466
472
|
# Start operation metrics tracking
|
467
|
-
metrics = OptimizationMetrics(
|
468
|
-
operation_name=operation_name,
|
469
|
-
target_seconds=target_seconds
|
470
|
-
)
|
473
|
+
metrics = OptimizationMetrics(operation_name=operation_name, target_seconds=target_seconds)
|
471
474
|
self.current_operation = metrics
|
472
|
-
|
475
|
+
|
473
476
|
# Start memory monitoring
|
474
477
|
self._start_memory_monitoring()
|
475
|
-
|
478
|
+
|
476
479
|
# Enhanced progress indicator for long operations
|
477
480
|
with Status(f"[cyan]🚀 Optimizing: {operation_name}[/cyan]", console=console):
|
478
481
|
try:
|
479
482
|
console.log(f"[dim]Starting optimized {operation_name} (target: {target_seconds}s)[/]")
|
480
|
-
|
483
|
+
|
481
484
|
yield metrics
|
482
|
-
|
485
|
+
|
483
486
|
# Mark as successful
|
484
487
|
metrics.finish(success=True)
|
485
488
|
self._log_optimization_results(metrics)
|
486
|
-
|
489
|
+
|
487
490
|
except Exception as e:
|
488
491
|
# Handle failure
|
489
492
|
metrics.finish(success=False, error_message=str(e))
|
490
493
|
print_error(f"Optimization failed for {operation_name}", e)
|
491
494
|
raise
|
492
|
-
|
495
|
+
|
493
496
|
finally:
|
494
497
|
# Stop monitoring and store results
|
495
498
|
self._stop_memory_monitoring()
|
@@ -499,51 +502,55 @@ class PerformanceOptimizationEngine:
|
|
499
502
|
def _start_memory_monitoring(self):
|
500
503
|
"""Start background memory usage monitoring with Phase 2 aggressive optimization"""
|
501
504
|
self.memory_monitoring_active = True
|
502
|
-
|
505
|
+
|
503
506
|
def monitor_memory():
|
504
507
|
peak_memory = 0.0
|
505
508
|
cleanup_counter = 0
|
506
|
-
|
509
|
+
|
507
510
|
while self.memory_monitoring_active and self.current_operation:
|
508
511
|
try:
|
509
512
|
current_memory = self.process.memory_info().rss / (1024 * 1024) # MB
|
510
513
|
peak_memory = max(peak_memory, current_memory)
|
511
514
|
self.current_operation.memory_peak_mb = peak_memory
|
512
|
-
|
515
|
+
|
513
516
|
# Phase 2: Aggressive memory management at 80% threshold
|
514
517
|
memory_threshold_80 = self.memory_limit_mb * 0.8
|
515
518
|
memory_threshold_90 = self.memory_limit_mb * 0.9
|
516
|
-
|
519
|
+
|
517
520
|
if current_memory > memory_threshold_90:
|
518
|
-
console.log(
|
521
|
+
console.log(
|
522
|
+
f"[red]🚨 CRITICAL: Memory usage ({current_memory:.1f}MB) at 90% limit ({self.memory_limit_mb}MB)[/red]"
|
523
|
+
)
|
519
524
|
if self.auto_memory_cleanup:
|
520
525
|
self._aggressive_memory_cleanup()
|
521
|
-
|
526
|
+
|
522
527
|
elif current_memory > memory_threshold_80:
|
523
|
-
console.log(
|
528
|
+
console.log(
|
529
|
+
f"[yellow]⚠️ WARNING: Memory usage ({current_memory:.1f}MB) at 80% limit ({self.memory_limit_mb}MB)[/yellow]"
|
530
|
+
)
|
524
531
|
if self.auto_memory_cleanup and cleanup_counter % 5 == 0: # Every 5 seconds at 80%
|
525
532
|
self._proactive_memory_cleanup()
|
526
|
-
|
533
|
+
|
527
534
|
# Phase 2: Proactive cleanup every 10 seconds
|
528
535
|
cleanup_counter += 1
|
529
536
|
if self.auto_memory_cleanup and cleanup_counter % 10 == 0:
|
530
537
|
gc.collect()
|
531
|
-
|
538
|
+
|
532
539
|
time.sleep(1) # Check every second
|
533
540
|
except Exception:
|
534
541
|
break
|
535
|
-
|
542
|
+
|
536
543
|
self.memory_thread = threading.Thread(target=monitor_memory, daemon=True)
|
537
544
|
self.memory_thread.start()
|
538
545
|
|
539
546
|
def _proactive_memory_cleanup(self):
|
540
547
|
"""Proactive memory cleanup at 80% threshold"""
|
541
548
|
console.log("[dim]🧹 Proactive memory cleanup initiated[/dim]")
|
542
|
-
|
549
|
+
|
543
550
|
# Clear old cache entries
|
544
|
-
if hasattr(self.cache,
|
551
|
+
if hasattr(self.cache, "_cleanup_oldest_entries"):
|
545
552
|
self.cache._cleanup_oldest_entries(int(self.cache.max_cache_size * 0.1)) # Clear 10%
|
546
|
-
|
553
|
+
|
547
554
|
# Force garbage collection
|
548
555
|
collected = gc.collect()
|
549
556
|
if collected > 0:
|
@@ -552,19 +559,19 @@ class PerformanceOptimizationEngine:
|
|
552
559
|
def _aggressive_memory_cleanup(self):
|
553
560
|
"""Aggressive memory cleanup at 90% threshold"""
|
554
561
|
console.log("[red]🚨 Aggressive memory cleanup initiated[/red]")
|
555
|
-
|
562
|
+
|
556
563
|
# Clear significant cache entries
|
557
|
-
if hasattr(self.cache,
|
564
|
+
if hasattr(self.cache, "_cleanup_oldest_entries"):
|
558
565
|
self.cache._cleanup_oldest_entries(int(self.cache.max_cache_size * 0.3)) # Clear 30%
|
559
|
-
|
566
|
+
|
560
567
|
# Multiple GC passes
|
561
568
|
total_collected = 0
|
562
569
|
for i in range(3):
|
563
570
|
collected = gc.collect(i)
|
564
571
|
total_collected += collected
|
565
|
-
|
572
|
+
|
566
573
|
console.log(f"[yellow]🗑️ Emergency cleanup collected {total_collected} objects[/yellow]")
|
567
|
-
|
574
|
+
|
568
575
|
# Update optimization applied list
|
569
576
|
if self.current_operation:
|
570
577
|
self.current_operation.optimization_applied.append("aggressive_memory_cleanup")
|
@@ -577,7 +584,7 @@ class PerformanceOptimizationEngine:
|
|
577
584
|
"""Log optimization results with rich formatting"""
|
578
585
|
improvement = metrics.get_performance_improvement()
|
579
586
|
cache_efficiency = metrics.get_cache_efficiency()
|
580
|
-
|
587
|
+
|
581
588
|
if metrics.success:
|
582
589
|
if metrics.duration_seconds <= metrics.target_seconds:
|
583
590
|
print_success(
|
@@ -589,33 +596,35 @@ class PerformanceOptimizationEngine:
|
|
589
596
|
f"{metrics.operation_name} completed in {metrics.duration_seconds:.1f}s "
|
590
597
|
f"(target: {metrics.target_seconds:.1f}s)"
|
591
598
|
)
|
592
|
-
|
599
|
+
|
593
600
|
# Log optimization details
|
594
601
|
if metrics.optimization_applied:
|
595
602
|
console.log(f"[dim]Optimizations applied: {', '.join(metrics.optimization_applied)}[/]")
|
596
|
-
|
603
|
+
|
597
604
|
if cache_efficiency > 0:
|
598
|
-
console.log(
|
605
|
+
console.log(
|
606
|
+
f"[dim]Cache efficiency: {cache_efficiency:.1f}% ({metrics.cache_hits} hits, {metrics.cache_misses} misses)[/]"
|
607
|
+
)
|
599
608
|
|
600
|
-
def optimize_organization_discovery(
|
601
|
-
|
602
|
-
|
603
|
-
batch_size: int = 20) -> Callable:
|
609
|
+
def optimize_organization_discovery(
|
610
|
+
self, management_profile: str, use_parallel_processing: bool = True, batch_size: int = 20
|
611
|
+
) -> Callable:
|
604
612
|
"""
|
605
613
|
Optimize organization discovery operations
|
606
|
-
|
614
|
+
|
607
615
|
Addresses: Organization Discovery Performance (52.3s -> <30s target)
|
608
|
-
|
616
|
+
|
609
617
|
Returns optimized function with:
|
610
618
|
- Intelligent caching for Organizations API calls
|
611
619
|
- Parallel account processing
|
612
620
|
- Memory-efficient batch processing
|
613
621
|
- Connection pooling
|
614
622
|
"""
|
623
|
+
|
615
624
|
def optimized_discover_accounts():
|
616
625
|
"""Optimized account discovery with caching and parallel processing"""
|
617
626
|
cache_key = f"org_accounts_{management_profile}"
|
618
|
-
|
627
|
+
|
619
628
|
# Check cache first
|
620
629
|
cached_result = self.cache.get(cache_key, ttl_minutes=15) # Shorter TTL for critical data
|
621
630
|
if cached_result and self.current_operation:
|
@@ -623,53 +632,55 @@ class PerformanceOptimizationEngine:
|
|
623
632
|
self.current_operation.optimization_applied.append("intelligent_caching")
|
624
633
|
console.log("[blue]🚀 Using cached organization data for optimal performance[/blue]")
|
625
634
|
return cached_result
|
626
|
-
|
635
|
+
|
627
636
|
if self.current_operation:
|
628
637
|
self.current_operation.cache_misses += 1
|
629
|
-
|
638
|
+
|
630
639
|
# Perform optimized discovery
|
631
640
|
try:
|
632
641
|
# Get optimized Organizations client
|
633
|
-
org_client = self.client_pool.get_client(
|
634
|
-
|
642
|
+
org_client = self.client_pool.get_client("organizations", management_profile)
|
643
|
+
|
635
644
|
accounts = []
|
636
|
-
paginator = org_client.get_paginator(
|
637
|
-
|
645
|
+
paginator = org_client.get_paginator("list_accounts")
|
646
|
+
|
638
647
|
# Track API calls
|
639
648
|
api_calls = 0
|
640
|
-
|
649
|
+
|
641
650
|
# Use parallel processing for account details if enabled
|
642
651
|
if use_parallel_processing:
|
643
652
|
if self.current_operation:
|
644
653
|
self.current_operation.optimization_applied.append("parallel_processing")
|
645
|
-
|
654
|
+
|
646
655
|
accounts = self._process_accounts_parallel(paginator, org_client, batch_size)
|
647
656
|
else:
|
648
657
|
# Sequential processing (fallback)
|
649
658
|
for page in paginator.paginate():
|
650
|
-
accounts.extend(page[
|
659
|
+
accounts.extend(page["Accounts"])
|
651
660
|
api_calls += 1
|
652
|
-
|
661
|
+
|
653
662
|
# Trigger garbage collection periodically for memory efficiency
|
654
663
|
if api_calls % 10 == 0:
|
655
664
|
gc.collect()
|
656
|
-
|
665
|
+
|
657
666
|
if self.current_operation:
|
658
667
|
self.current_operation.api_calls_made = api_calls
|
659
668
|
self.current_operation.optimization_applied.append("connection_pooling")
|
660
|
-
|
669
|
+
|
661
670
|
# Cache the result
|
662
671
|
result = {
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
672
|
+
"accounts": accounts,
|
673
|
+
"total_count": len(accounts),
|
674
|
+
"discovery_method": "optimized_organizations_api",
|
675
|
+
"optimizations_applied": self.current_operation.optimization_applied
|
676
|
+
if self.current_operation
|
677
|
+
else [],
|
667
678
|
}
|
668
|
-
|
679
|
+
|
669
680
|
self.cache.set(cache_key, result)
|
670
|
-
|
681
|
+
|
671
682
|
return result
|
672
|
-
|
683
|
+
|
673
684
|
except Exception as e:
|
674
685
|
logger.error(f"Optimized organization discovery failed: {e}")
|
675
686
|
raise
|
@@ -679,23 +690,23 @@ class PerformanceOptimizationEngine:
|
|
679
690
|
def _process_accounts_parallel(self, paginator, org_client, batch_size: int) -> List[Dict]:
|
680
691
|
"""Process accounts in parallel with memory optimization"""
|
681
692
|
all_accounts = []
|
682
|
-
|
693
|
+
|
683
694
|
# Collect all account IDs first (memory efficient)
|
684
695
|
account_ids = []
|
685
696
|
for page in paginator.paginate():
|
686
|
-
account_ids.extend([acc[
|
687
|
-
all_accounts.extend(page[
|
688
|
-
|
697
|
+
account_ids.extend([acc["Id"] for acc in page["Accounts"]])
|
698
|
+
all_accounts.extend(page["Accounts"]) # Store basic account info
|
699
|
+
|
689
700
|
if self.current_operation:
|
690
701
|
self.current_operation.api_calls_made += len(account_ids) // 100 + 1 # Estimate pages
|
691
|
-
|
702
|
+
|
692
703
|
# Process account tags in batches to avoid memory issues
|
693
704
|
if len(account_ids) > batch_size:
|
694
705
|
if self.current_operation:
|
695
706
|
self.current_operation.optimization_applied.append("batch_processing")
|
696
|
-
|
707
|
+
|
697
708
|
self._enrich_accounts_with_tags_batched(all_accounts, org_client, batch_size)
|
698
|
-
|
709
|
+
|
699
710
|
return all_accounts
|
700
711
|
|
701
712
|
def _enrich_accounts_with_tags_batched(self, accounts: List[Dict], org_client, batch_size: int):
|
@@ -703,25 +714,25 @@ class PerformanceOptimizationEngine:
|
|
703
714
|
with ThreadPoolExecutor(max_workers=min(self.max_workers, 10)) as executor:
|
704
715
|
# Process in batches to control memory usage
|
705
716
|
for i in range(0, len(accounts), batch_size):
|
706
|
-
batch = accounts[i:i + batch_size]
|
707
|
-
|
717
|
+
batch = accounts[i : i + batch_size]
|
718
|
+
|
708
719
|
# Submit batch for parallel tag processing
|
709
720
|
futures = []
|
710
721
|
for account in batch:
|
711
|
-
future = executor.submit(self._get_account_tags_safe, org_client, account[
|
722
|
+
future = executor.submit(self._get_account_tags_safe, org_client, account["Id"])
|
712
723
|
futures.append((future, account))
|
713
|
-
|
724
|
+
|
714
725
|
# Collect results for this batch
|
715
726
|
for future, account in futures:
|
716
727
|
try:
|
717
728
|
tags = future.result(timeout=10) # 10 second timeout per account
|
718
|
-
account[
|
729
|
+
account["Tags"] = tags
|
719
730
|
if self.current_operation:
|
720
731
|
self.current_operation.api_calls_made += 1
|
721
732
|
except Exception as e:
|
722
733
|
logger.debug(f"Failed to get tags for account {account['Id']}: {e}")
|
723
|
-
account[
|
724
|
-
|
734
|
+
account["Tags"] = {}
|
735
|
+
|
725
736
|
# Trigger garbage collection after each batch
|
726
737
|
gc.collect()
|
727
738
|
|
@@ -729,112 +740,109 @@ class PerformanceOptimizationEngine:
|
|
729
740
|
"""Safely get account tags with error handling"""
|
730
741
|
try:
|
731
742
|
response = org_client.list_tags_for_resource(ResourceId=account_id)
|
732
|
-
return {tag[
|
743
|
+
return {tag["Key"]: tag["Value"] for tag in response["Tags"]}
|
733
744
|
except Exception:
|
734
745
|
return {}
|
735
746
|
|
736
747
|
def optimize_vpc_analysis(self, operational_profile: str) -> Callable:
|
737
748
|
"""
|
738
749
|
Optimize VPC analysis operations to address timeout issues
|
739
|
-
|
750
|
+
|
740
751
|
Returns optimized function with:
|
741
752
|
- Connection pooling for multiple regions
|
742
753
|
- Parallel region processing
|
743
754
|
- Intelligent timeout handling
|
744
755
|
- Memory-efficient resource processing
|
745
756
|
"""
|
757
|
+
|
746
758
|
def optimized_vpc_analysis(regions: List[str] = None):
|
747
759
|
"""Optimized VPC analysis with regional parallelization"""
|
748
760
|
if regions is None:
|
749
|
-
regions = [
|
750
|
-
|
751
|
-
'ap-southeast-1', 'ap-northeast-1'
|
752
|
-
]
|
753
|
-
|
761
|
+
regions = ["us-east-1", "us-west-2", "eu-west-1", "eu-central-1", "ap-southeast-1", "ap-northeast-1"]
|
762
|
+
|
754
763
|
cache_key = f"vpc_analysis_{operational_profile}_{'_'.join(sorted(regions))}"
|
755
|
-
|
764
|
+
|
756
765
|
# Check cache
|
757
766
|
cached_result = self.cache.get(cache_key, ttl_minutes=60) # Longer TTL for VPC data
|
758
767
|
if cached_result and self.current_operation:
|
759
768
|
self.current_operation.cache_hits += 1
|
760
769
|
self.current_operation.optimization_applied.append("regional_caching")
|
761
770
|
return cached_result
|
762
|
-
|
771
|
+
|
763
772
|
if self.current_operation:
|
764
773
|
self.current_operation.cache_misses += 1
|
765
774
|
self.current_operation.optimization_applied.append("parallel_regional_processing")
|
766
|
-
|
775
|
+
|
767
776
|
# Parallel regional analysis
|
768
777
|
vpc_data = {}
|
769
|
-
|
778
|
+
|
770
779
|
with Progress(
|
771
780
|
SpinnerColumn(),
|
772
781
|
TextColumn("[progress.description]{task.description}"),
|
773
782
|
BarColumn(),
|
774
783
|
MofNCompleteColumn(),
|
775
784
|
TimeElapsedColumn(),
|
776
|
-
console=console
|
785
|
+
console=console,
|
777
786
|
) as progress:
|
778
|
-
|
779
787
|
task = progress.add_task("Analyzing VPCs across regions...", total=len(regions))
|
780
|
-
|
788
|
+
|
781
789
|
with ThreadPoolExecutor(max_workers=min(self.max_workers, len(regions))) as executor:
|
782
790
|
# Submit region analysis tasks
|
783
791
|
future_to_region = {
|
784
|
-
executor.submit(self._analyze_vpc_region, operational_profile, region): region
|
792
|
+
executor.submit(self._analyze_vpc_region, operational_profile, region): region
|
785
793
|
for region in regions
|
786
794
|
}
|
787
|
-
|
795
|
+
|
788
796
|
for future in as_completed(future_to_region):
|
789
797
|
region = future_to_region[future]
|
790
798
|
try:
|
791
799
|
region_data = future.result(timeout=45) # 45s timeout per region
|
792
800
|
vpc_data[region] = region_data
|
793
|
-
|
801
|
+
|
794
802
|
if self.current_operation:
|
795
|
-
self.current_operation.api_calls_made += region_data.get(
|
796
|
-
|
803
|
+
self.current_operation.api_calls_made += region_data.get("api_calls", 0)
|
804
|
+
|
797
805
|
except Exception as e:
|
798
806
|
logger.warning(f"VPC analysis failed for region {region}: {e}")
|
799
|
-
vpc_data[region] = {
|
800
|
-
|
807
|
+
vpc_data[region] = {"error": str(e), "vpcs": []}
|
808
|
+
|
801
809
|
finally:
|
802
810
|
progress.advance(task)
|
803
|
-
|
811
|
+
|
804
812
|
# Aggregate results
|
805
813
|
result = {
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
814
|
+
"vpc_data_by_region": vpc_data,
|
815
|
+
"total_vpcs": sum(len(data.get("vpcs", [])) for data in vpc_data.values()),
|
816
|
+
"regions_analyzed": len(regions),
|
817
|
+
"optimization_applied": self.current_operation.optimization_applied if self.current_operation else [],
|
810
818
|
}
|
811
|
-
|
819
|
+
|
812
820
|
# Cache result
|
813
821
|
self.cache.set(cache_key, result)
|
814
|
-
|
822
|
+
|
815
823
|
return result
|
816
824
|
|
817
825
|
return optimized_vpc_analysis
|
818
826
|
|
819
|
-
def optimize_multi_account_operations(
|
820
|
-
|
821
|
-
|
822
|
-
batch_size: Optional[int] = None) -> Callable:
|
827
|
+
def optimize_multi_account_operations(
|
828
|
+
self, account_list: List[str], operation_function: Callable, batch_size: Optional[int] = None
|
829
|
+
) -> Callable:
|
823
830
|
"""
|
824
831
|
Phase 2: Optimize multi-account operations for 200+ enterprise account scaling
|
825
|
-
|
832
|
+
|
826
833
|
Args:
|
827
834
|
account_list: List of AWS account IDs to process
|
828
835
|
operation_function: Function to execute per account
|
829
836
|
batch_size: Adaptive batch size (auto-calculated if None)
|
830
|
-
|
837
|
+
|
831
838
|
Returns:
|
832
839
|
Optimized function with enterprise scaling patterns
|
833
840
|
"""
|
841
|
+
|
834
842
|
def optimized_multi_account_operation(**kwargs):
|
835
843
|
"""Optimized multi-account operation with adaptive scaling"""
|
836
844
|
account_count = len(account_list)
|
837
|
-
|
845
|
+
|
838
846
|
# Phase 2: Adaptive batch sizing based on account count and memory
|
839
847
|
if batch_size is None:
|
840
848
|
if account_count <= 50:
|
@@ -847,28 +855,30 @@ class PerformanceOptimizationEngine:
|
|
847
855
|
calculated_batch_size = 25 # Enterprise scale 200+
|
848
856
|
else:
|
849
857
|
calculated_batch_size = batch_size
|
850
|
-
|
858
|
+
|
851
859
|
# Adjust batch size based on current memory usage
|
852
860
|
if self.memory_optimization_active:
|
853
861
|
current_memory = self.process.memory_info().rss / (1024 * 1024)
|
854
862
|
memory_utilization = current_memory / self.memory_limit_mb
|
855
|
-
|
863
|
+
|
856
864
|
if memory_utilization > 0.7:
|
857
865
|
calculated_batch_size = max(5, calculated_batch_size // 2)
|
858
|
-
console.log(
|
859
|
-
|
860
|
-
|
861
|
-
|
866
|
+
console.log(
|
867
|
+
f"[yellow]📉 Reducing batch size to {calculated_batch_size} due to memory pressure[/yellow]"
|
868
|
+
)
|
869
|
+
|
870
|
+
console.log(
|
871
|
+
f"[cyan]🏢 Enterprise multi-account operation: {account_count} accounts, batch size: {calculated_batch_size}[/cyan]"
|
872
|
+
)
|
873
|
+
|
862
874
|
if self.current_operation:
|
863
|
-
self.current_operation.optimization_applied.extend(
|
864
|
-
"enterprise_multi_account_scaling",
|
865
|
-
|
866
|
-
|
867
|
-
])
|
868
|
-
|
875
|
+
self.current_operation.optimization_applied.extend(
|
876
|
+
["enterprise_multi_account_scaling", "adaptive_batch_sizing", f"batch_size_{calculated_batch_size}"]
|
877
|
+
)
|
878
|
+
|
869
879
|
results = {}
|
870
880
|
processed_count = 0
|
871
|
-
|
881
|
+
|
872
882
|
# Process accounts in adaptive batches
|
873
883
|
with Progress(
|
874
884
|
SpinnerColumn(),
|
@@ -876,83 +886,84 @@ class PerformanceOptimizationEngine:
|
|
876
886
|
BarColumn(),
|
877
887
|
MofNCompleteColumn(),
|
878
888
|
TimeElapsedColumn(),
|
879
|
-
console=console
|
889
|
+
console=console,
|
880
890
|
) as progress:
|
881
|
-
|
882
891
|
task = progress.add_task("Processing enterprise accounts...", total=account_count)
|
883
|
-
|
892
|
+
|
884
893
|
# Process in batches with circuit breaker protection
|
885
894
|
for i in range(0, account_count, calculated_batch_size):
|
886
|
-
batch_accounts = account_list[i:i + calculated_batch_size]
|
887
|
-
|
895
|
+
batch_accounts = account_list[i : i + calculated_batch_size]
|
896
|
+
|
888
897
|
with ThreadPoolExecutor(max_workers=min(self.max_workers, len(batch_accounts))) as executor:
|
889
898
|
batch_futures = {}
|
890
|
-
|
899
|
+
|
891
900
|
for account_id in batch_accounts:
|
892
901
|
# Use circuit breaker protection for each account
|
893
902
|
client_key = f"account_{account_id}"
|
894
|
-
|
903
|
+
|
895
904
|
try:
|
896
905
|
future = executor.submit(
|
897
906
|
self._protected_account_operation,
|
898
907
|
client_key,
|
899
908
|
operation_function,
|
900
909
|
account_id,
|
901
|
-
**kwargs
|
910
|
+
**kwargs,
|
902
911
|
)
|
903
912
|
batch_futures[future] = account_id
|
904
|
-
|
913
|
+
|
905
914
|
except Exception as e:
|
906
915
|
logger.warning(f"Failed to submit operation for account {account_id}: {e}")
|
907
916
|
results[account_id] = {"error": str(e), "success": False}
|
908
|
-
|
917
|
+
|
909
918
|
# Collect batch results with timeout handling
|
910
919
|
for future in as_completed(batch_futures, timeout=120): # 2 minute timeout per batch
|
911
920
|
account_id = batch_futures[future]
|
912
921
|
try:
|
913
922
|
result = future.result(timeout=60) # 1 minute per account
|
914
923
|
results[account_id] = result
|
915
|
-
|
924
|
+
|
916
925
|
except Exception as e:
|
917
926
|
logger.warning(f"Account operation failed for {account_id}: {e}")
|
918
927
|
results[account_id] = {"error": str(e), "success": False}
|
919
|
-
|
928
|
+
|
920
929
|
finally:
|
921
930
|
processed_count += 1
|
922
931
|
progress.advance(task)
|
923
|
-
|
932
|
+
|
924
933
|
# Phase 2: Proactive memory cleanup between batches
|
925
934
|
if self.auto_memory_cleanup and i > 0:
|
926
935
|
current_memory = self.process.memory_info().rss / (1024 * 1024)
|
927
936
|
if current_memory > self.memory_limit_mb * 0.6:
|
928
937
|
self._proactive_memory_cleanup()
|
929
938
|
time.sleep(1) # Brief pause after cleanup
|
930
|
-
|
939
|
+
|
931
940
|
# Update operation metrics
|
932
941
|
if self.current_operation:
|
933
942
|
self.current_operation.api_calls_made += processed_count
|
934
943
|
success_count = sum(1 for r in results.values() if r.get("success", False))
|
935
944
|
success_rate = (success_count / processed_count * 100) if processed_count > 0 else 0
|
936
|
-
|
937
|
-
console.log(
|
938
|
-
|
945
|
+
|
946
|
+
console.log(
|
947
|
+
f"[green]✅ Multi-account operation completed: {success_count}/{processed_count} accounts ({success_rate:.1f}% success)[/green]"
|
948
|
+
)
|
949
|
+
|
939
950
|
if success_rate >= 99.0:
|
940
951
|
self.current_operation.optimization_applied.append("high_reliability_achieved")
|
941
|
-
|
952
|
+
|
942
953
|
return {
|
943
|
-
|
944
|
-
|
945
|
-
|
946
|
-
|
947
|
-
|
948
|
-
|
949
|
-
|
950
|
-
|
951
|
-
|
952
|
-
|
953
|
-
}
|
954
|
+
"results": results,
|
955
|
+
"total_accounts": account_count,
|
956
|
+
"processed_accounts": processed_count,
|
957
|
+
"success_rate": success_rate,
|
958
|
+
"batch_size_used": calculated_batch_size,
|
959
|
+
"optimization_summary": {
|
960
|
+
"enterprise_scaling": True,
|
961
|
+
"adaptive_batching": True,
|
962
|
+
"memory_optimized": self.memory_optimization_active,
|
963
|
+
"reliability_protected": True,
|
964
|
+
},
|
954
965
|
}
|
955
|
-
|
966
|
+
|
956
967
|
return optimized_multi_account_operation
|
957
968
|
|
958
969
|
def _protected_account_operation(self, client_key: str, operation_function: Callable, account_id: str, **kwargs):
|
@@ -962,57 +973,53 @@ class PerformanceOptimizationEngine:
|
|
962
973
|
self.client_pool.circuit_breakers[client_key] = CircuitBreaker(
|
963
974
|
failure_threshold=2, # More aggressive for account-level operations
|
964
975
|
recovery_timeout_seconds=15, # Faster recovery for account operations
|
965
|
-
success_threshold=1 # Close quickly on success
|
976
|
+
success_threshold=1, # Close quickly on success
|
966
977
|
)
|
967
|
-
|
978
|
+
|
968
979
|
with self.client_pool.circuit_breakers[client_key].protected_call(f"account_{account_id}"):
|
969
980
|
return operation_function(account_id=account_id, **kwargs)
|
970
981
|
|
971
982
|
def _analyze_vpc_region(self, profile: str, region: str) -> Dict:
|
972
983
|
"""Analyze VPCs in a specific region with optimization"""
|
973
984
|
try:
|
974
|
-
ec2_client = self.client_pool.get_client(
|
975
|
-
|
985
|
+
ec2_client = self.client_pool.get_client("ec2", profile, region)
|
986
|
+
|
976
987
|
# Get VPCs with pagination
|
977
988
|
vpcs = []
|
978
989
|
api_calls = 0
|
979
|
-
|
980
|
-
paginator = ec2_client.get_paginator(
|
990
|
+
|
991
|
+
paginator = ec2_client.get_paginator("describe_vpcs")
|
981
992
|
for page in paginator.paginate():
|
982
|
-
vpcs.extend(page[
|
993
|
+
vpcs.extend(page["Vpcs"])
|
983
994
|
api_calls += 1
|
984
|
-
|
995
|
+
|
985
996
|
# Enrich with network details (optimized)
|
986
997
|
for vpc in vpcs:
|
987
998
|
# Get subnets for this VPC
|
988
999
|
try:
|
989
1000
|
subnets_response = ec2_client.describe_subnets(
|
990
|
-
Filters=[{
|
1001
|
+
Filters=[{"Name": "vpc-id", "Values": [vpc["VpcId"]]}]
|
991
1002
|
)
|
992
|
-
vpc[
|
1003
|
+
vpc["Subnets"] = subnets_response["Subnets"]
|
993
1004
|
api_calls += 1
|
994
1005
|
except Exception as e:
|
995
1006
|
logger.debug(f"Failed to get subnets for VPC {vpc['VpcId']}: {e}")
|
996
|
-
vpc[
|
997
|
-
|
998
|
-
return {
|
999
|
-
|
1000
|
-
'region': region,
|
1001
|
-
'api_calls': api_calls
|
1002
|
-
}
|
1003
|
-
|
1007
|
+
vpc["Subnets"] = []
|
1008
|
+
|
1009
|
+
return {"vpcs": vpcs, "region": region, "api_calls": api_calls}
|
1010
|
+
|
1004
1011
|
except Exception as e:
|
1005
1012
|
logger.error(f"VPC region analysis failed for {region}: {e}")
|
1006
|
-
return {
|
1013
|
+
return {"vpcs": [], "region": region, "error": str(e), "api_calls": 0}
|
1007
1014
|
|
1008
1015
|
def create_optimization_summary(self) -> None:
|
1009
1016
|
"""Create comprehensive optimization performance summary with Phase 2 reliability metrics"""
|
1010
1017
|
if not self.metrics:
|
1011
1018
|
console.print("[yellow]No optimization metrics available yet[/]")
|
1012
1019
|
return
|
1013
|
-
|
1020
|
+
|
1014
1021
|
print_header("Performance Optimization Summary - Phase 2 Enhanced", "SRE Automation Engine")
|
1015
|
-
|
1022
|
+
|
1016
1023
|
# Phase 2: Create enhanced metrics table with reliability information
|
1017
1024
|
table = create_table(
|
1018
1025
|
title="Phase 2 Optimization Results",
|
@@ -1023,23 +1030,23 @@ class PerformanceOptimizationEngine:
|
|
1023
1030
|
{"name": "Memory", "style": "blue", "justify": "right"},
|
1024
1031
|
{"name": "Improvement", "style": "white", "justify": "right"},
|
1025
1032
|
{"name": "Optimizations", "style": "dim", "justify": "left", "max_width": 25},
|
1026
|
-
{"name": "Status", "style": "white", "justify": "center"}
|
1027
|
-
]
|
1033
|
+
{"name": "Status", "style": "white", "justify": "center"},
|
1034
|
+
],
|
1028
1035
|
)
|
1029
|
-
|
1036
|
+
|
1030
1037
|
for metrics in self.metrics:
|
1031
1038
|
improvement = metrics.get_performance_improvement()
|
1032
|
-
status_icon = STATUS_INDICATORS[
|
1033
|
-
status_color =
|
1034
|
-
|
1039
|
+
status_icon = STATUS_INDICATORS["success"] if metrics.success else STATUS_INDICATORS["error"]
|
1040
|
+
status_color = "green" if metrics.success else "red"
|
1041
|
+
|
1035
1042
|
improvement_text = f"+{improvement:.1f}%" if improvement > 0 else f"{improvement:.1f}%"
|
1036
|
-
improvement_color =
|
1037
|
-
|
1043
|
+
improvement_color = "green" if improvement > 0 else "yellow"
|
1044
|
+
|
1038
1045
|
# Phase 2: Memory usage display with color coding
|
1039
1046
|
memory_mb = metrics.memory_peak_mb
|
1040
|
-
memory_color =
|
1047
|
+
memory_color = "green" if memory_mb <= 256 else "yellow" if memory_mb <= 512 else "red"
|
1041
1048
|
memory_text = f"[{memory_color}]{memory_mb:.0f}MB[/{memory_color}]"
|
1042
|
-
|
1049
|
+
|
1043
1050
|
table.add_row(
|
1044
1051
|
metrics.operation_name,
|
1045
1052
|
f"{metrics.duration_seconds:.1f}s",
|
@@ -1047,29 +1054,27 @@ class PerformanceOptimizationEngine:
|
|
1047
1054
|
memory_text,
|
1048
1055
|
f"[{improvement_color}]{improvement_text}[/]",
|
1049
1056
|
", ".join(metrics.optimization_applied[:2]) + ("..." if len(metrics.optimization_applied) > 2 else ""),
|
1050
|
-
f"[{status_color}]{status_icon}[/]"
|
1057
|
+
f"[{status_color}]{status_icon}[/]",
|
1051
1058
|
)
|
1052
|
-
|
1059
|
+
|
1053
1060
|
console.print(table)
|
1054
|
-
|
1061
|
+
|
1055
1062
|
# Cache statistics
|
1056
1063
|
cache_stats = self.cache.get_stats()
|
1057
1064
|
cache_panel = Panel(
|
1058
1065
|
f"[cyan]Cache Size:[/] {cache_stats['size']}/{cache_stats['max_size']}\n"
|
1059
1066
|
f"[cyan]Hit Rate:[/] {cache_stats['hit_rate']:.1f}% ({cache_stats['hits']} hits, {cache_stats['misses']} misses)",
|
1060
1067
|
title="[bold]Cache Performance[/bold]",
|
1061
|
-
border_style="blue"
|
1068
|
+
border_style="blue",
|
1062
1069
|
)
|
1063
1070
|
console.print(cache_panel)
|
1064
|
-
|
1071
|
+
|
1065
1072
|
# Phase 2: Reliability status panel
|
1066
1073
|
reliability_stats = self.client_pool.get_reliability_status()
|
1067
|
-
reliability_color = {
|
1068
|
-
|
1069
|
-
|
1070
|
-
|
1071
|
-
}.get(reliability_stats.get('reliability_status', 'good'), 'white')
|
1072
|
-
|
1074
|
+
reliability_color = {"excellent": "green", "good": "blue", "needs_improvement": "yellow"}.get(
|
1075
|
+
reliability_stats.get("reliability_status", "good"), "white"
|
1076
|
+
)
|
1077
|
+
|
1073
1078
|
reliability_panel = Panel(
|
1074
1079
|
f"[cyan]Success Rate:[/] [{reliability_color}]{reliability_stats['overall_success_rate']:.2f}%[/{reliability_color}] "
|
1075
1080
|
f"(Target: {reliability_stats['target_success_rate']}%)\n"
|
@@ -1078,33 +1083,39 @@ class PerformanceOptimizationEngine:
|
|
1078
1083
|
f"[cyan]Circuit Breakers:[/] {len(reliability_stats['circuit_breakers'])} active "
|
1079
1084
|
f"([cyan]Status:[/] [{reliability_color}]{reliability_stats['reliability_status'].title()}[/{reliability_color}])",
|
1080
1085
|
title="[bold]Phase 2 Reliability Metrics[/bold]",
|
1081
|
-
border_style=reliability_color
|
1086
|
+
border_style=reliability_color,
|
1082
1087
|
)
|
1083
1088
|
console.print(reliability_panel)
|
1084
|
-
|
1089
|
+
|
1085
1090
|
# Phase 2: Memory optimization status
|
1086
1091
|
memory_report = self.get_memory_usage_report()
|
1087
|
-
memory_color =
|
1088
|
-
|
1092
|
+
memory_color = (
|
1093
|
+
"green"
|
1094
|
+
if memory_report["current_memory_mb"] <= 256
|
1095
|
+
else "yellow"
|
1096
|
+
if memory_report["current_memory_mb"] <= 512
|
1097
|
+
else "red"
|
1098
|
+
)
|
1099
|
+
|
1089
1100
|
memory_panel = Panel(
|
1090
1101
|
f"[cyan]Current Memory:[/] [{memory_color}]{memory_report['current_memory_mb']:.1f}MB[/{memory_color}] / {self.memory_limit_mb}MB\n"
|
1091
1102
|
f"[cyan]Peak Memory:[/] {memory_report.get('peak_memory_mb', 0):.1f}MB\n"
|
1092
1103
|
f"[cyan]Status:[/] [{memory_color}]{memory_report['memory_efficiency'].title()}[/{memory_color}] "
|
1093
1104
|
f"([cyan]Cleanup:[/] {'Enabled' if self.auto_memory_cleanup else 'Disabled'})",
|
1094
1105
|
title="[bold]Phase 2 Memory Optimization[/bold]",
|
1095
|
-
border_style=memory_color
|
1106
|
+
border_style=memory_color,
|
1096
1107
|
)
|
1097
1108
|
console.print(memory_panel)
|
1098
1109
|
|
1099
1110
|
def get_memory_usage_report(self) -> Dict[str, Any]:
|
1100
1111
|
"""Get current memory usage report"""
|
1101
1112
|
memory_info = self.process.memory_info()
|
1102
|
-
|
1113
|
+
|
1103
1114
|
return {
|
1104
1115
|
"current_memory_mb": memory_info.rss / (1024 * 1024),
|
1105
1116
|
"peak_memory_mb": max(m.memory_peak_mb for m in self.metrics) if self.metrics else 0.0,
|
1106
1117
|
"memory_limit_mb": self.memory_limit_mb,
|
1107
|
-
"memory_efficiency": "good" if memory_info.rss / (1024 * 1024) < self.memory_limit_mb * 0.8 else "warning"
|
1118
|
+
"memory_efficiency": "good" if memory_info.rss / (1024 * 1024) < self.memory_limit_mb * 0.8 else "warning",
|
1108
1119
|
}
|
1109
1120
|
|
1110
1121
|
def clear_caches(self):
|
@@ -1118,16 +1129,14 @@ class PerformanceOptimizationEngine:
|
|
1118
1129
|
_optimization_engine: Optional[PerformanceOptimizationEngine] = None
|
1119
1130
|
|
1120
1131
|
|
1121
|
-
def get_optimization_engine(
|
1122
|
-
|
1123
|
-
|
1132
|
+
def get_optimization_engine(
|
1133
|
+
max_workers: int = 20, cache_ttl_minutes: int = 30, memory_limit_mb: int = 512
|
1134
|
+
) -> PerformanceOptimizationEngine: # Phase 2: Default 512MB
|
1124
1135
|
"""Get or create global performance optimization engine with Phase 2 enhancements"""
|
1125
1136
|
global _optimization_engine
|
1126
1137
|
if _optimization_engine is None:
|
1127
1138
|
_optimization_engine = PerformanceOptimizationEngine(
|
1128
|
-
max_workers=max_workers,
|
1129
|
-
cache_ttl_minutes=cache_ttl_minutes,
|
1130
|
-
memory_limit_mb=memory_limit_mb
|
1139
|
+
max_workers=max_workers, cache_ttl_minutes=cache_ttl_minutes, memory_limit_mb=memory_limit_mb
|
1131
1140
|
)
|
1132
1141
|
return _optimization_engine
|
1133
1142
|
|
@@ -1143,11 +1152,11 @@ def create_optimization_report():
|
|
1143
1152
|
# Export public interface - Phase 2 Enhanced
|
1144
1153
|
__all__ = [
|
1145
1154
|
"PerformanceOptimizationEngine",
|
1146
|
-
"OptimizationMetrics",
|
1155
|
+
"OptimizationMetrics",
|
1147
1156
|
"IntelligentCache",
|
1148
1157
|
"OptimizedAWSClientPool",
|
1149
1158
|
"CircuitBreaker",
|
1150
1159
|
"CircuitBreakerState",
|
1151
1160
|
"get_optimization_engine",
|
1152
|
-
"create_optimization_report"
|
1153
|
-
]
|
1161
|
+
"create_optimization_report",
|
1162
|
+
]
|