runbooks 0.7.9__py3-none-any.whl → 0.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- runbooks/__init__.py +1 -1
- runbooks/cfat/README.md +12 -1
- runbooks/cfat/__init__.py +1 -1
- runbooks/cfat/assessment/compliance.py +4 -1
- runbooks/cfat/assessment/runner.py +42 -34
- runbooks/cfat/models.py +1 -1
- runbooks/cloudops/__init__.py +123 -0
- runbooks/cloudops/base.py +385 -0
- runbooks/cloudops/cost_optimizer.py +811 -0
- runbooks/cloudops/infrastructure_optimizer.py +29 -0
- runbooks/cloudops/interfaces.py +828 -0
- runbooks/cloudops/lifecycle_manager.py +29 -0
- runbooks/cloudops/mcp_cost_validation.py +678 -0
- runbooks/cloudops/models.py +251 -0
- runbooks/cloudops/monitoring_automation.py +29 -0
- runbooks/cloudops/notebook_framework.py +676 -0
- runbooks/cloudops/security_enforcer.py +449 -0
- runbooks/common/__init__.py +152 -0
- runbooks/common/accuracy_validator.py +1039 -0
- runbooks/common/context_logger.py +440 -0
- runbooks/common/cross_module_integration.py +594 -0
- runbooks/common/enhanced_exception_handler.py +1108 -0
- runbooks/common/enterprise_audit_integration.py +634 -0
- runbooks/common/mcp_cost_explorer_integration.py +900 -0
- runbooks/common/mcp_integration.py +548 -0
- runbooks/common/performance_monitor.py +387 -0
- runbooks/common/profile_utils.py +216 -0
- runbooks/common/rich_utils.py +172 -1
- runbooks/feedback/user_feedback_collector.py +440 -0
- runbooks/finops/README.md +377 -458
- runbooks/finops/__init__.py +4 -21
- runbooks/finops/account_resolver.py +279 -0
- runbooks/finops/accuracy_cross_validator.py +638 -0
- runbooks/finops/aws_client.py +721 -36
- runbooks/finops/budget_integration.py +313 -0
- runbooks/finops/cli.py +59 -5
- runbooks/finops/cost_optimizer.py +1340 -0
- runbooks/finops/cost_processor.py +211 -37
- runbooks/finops/dashboard_router.py +900 -0
- runbooks/finops/dashboard_runner.py +990 -232
- runbooks/finops/embedded_mcp_validator.py +288 -0
- runbooks/finops/enhanced_dashboard_runner.py +8 -7
- runbooks/finops/enhanced_progress.py +327 -0
- runbooks/finops/enhanced_trend_visualization.py +423 -0
- runbooks/finops/finops_dashboard.py +184 -1829
- runbooks/finops/helpers.py +509 -196
- runbooks/finops/iam_guidance.py +400 -0
- runbooks/finops/markdown_exporter.py +466 -0
- runbooks/finops/multi_dashboard.py +1502 -0
- runbooks/finops/optimizer.py +15 -15
- runbooks/finops/profile_processor.py +2 -2
- runbooks/finops/runbooks.inventory.organizations_discovery.log +0 -0
- runbooks/finops/runbooks.security.report_generator.log +0 -0
- runbooks/finops/runbooks.security.run_script.log +0 -0
- runbooks/finops/runbooks.security.security_export.log +0 -0
- runbooks/finops/schemas.py +589 -0
- runbooks/finops/service_mapping.py +195 -0
- runbooks/finops/single_dashboard.py +710 -0
- runbooks/finops/tests/test_reference_images_validation.py +1 -1
- runbooks/inventory/README.md +12 -1
- runbooks/inventory/core/collector.py +157 -29
- runbooks/inventory/list_ec2_instances.py +9 -6
- runbooks/inventory/list_ssm_parameters.py +10 -10
- runbooks/inventory/organizations_discovery.py +210 -164
- runbooks/inventory/rich_inventory_display.py +74 -107
- runbooks/inventory/run_on_multi_accounts.py +13 -13
- runbooks/inventory/runbooks.inventory.organizations_discovery.log +0 -0
- runbooks/inventory/runbooks.security.security_export.log +0 -0
- runbooks/main.py +1371 -240
- runbooks/metrics/dora_metrics_engine.py +711 -17
- runbooks/monitoring/performance_monitor.py +433 -0
- runbooks/operate/README.md +394 -0
- runbooks/operate/base.py +215 -47
- runbooks/operate/ec2_operations.py +435 -5
- runbooks/operate/iam_operations.py +598 -3
- runbooks/operate/privatelink_operations.py +1 -1
- runbooks/operate/rds_operations.py +508 -0
- runbooks/operate/s3_operations.py +508 -0
- runbooks/operate/vpc_endpoints.py +1 -1
- runbooks/remediation/README.md +489 -13
- runbooks/remediation/base.py +5 -3
- runbooks/remediation/commons.py +8 -4
- runbooks/security/ENTERPRISE_SECURITY_FRAMEWORK.md +506 -0
- runbooks/security/README.md +12 -1
- runbooks/security/__init__.py +265 -33
- runbooks/security/cloudops_automation_security_validator.py +1164 -0
- runbooks/security/compliance_automation.py +12 -10
- runbooks/security/compliance_automation_engine.py +1021 -0
- runbooks/security/enterprise_security_framework.py +930 -0
- runbooks/security/enterprise_security_policies.json +293 -0
- runbooks/security/executive_security_dashboard.py +1247 -0
- runbooks/security/integration_test_enterprise_security.py +879 -0
- runbooks/security/module_security_integrator.py +641 -0
- runbooks/security/multi_account_security_controls.py +2254 -0
- runbooks/security/real_time_security_monitor.py +1196 -0
- runbooks/security/report_generator.py +1 -1
- runbooks/security/run_script.py +4 -8
- runbooks/security/security_baseline_tester.py +39 -52
- runbooks/security/security_export.py +99 -120
- runbooks/sre/README.md +472 -0
- runbooks/sre/__init__.py +33 -0
- runbooks/sre/mcp_reliability_engine.py +1049 -0
- runbooks/sre/performance_optimization_engine.py +1032 -0
- runbooks/sre/production_monitoring_framework.py +584 -0
- runbooks/sre/reliability_monitoring_framework.py +1011 -0
- runbooks/validation/__init__.py +2 -2
- runbooks/validation/benchmark.py +154 -149
- runbooks/validation/cli.py +159 -147
- runbooks/validation/mcp_validator.py +291 -248
- runbooks/vpc/README.md +478 -0
- runbooks/vpc/__init__.py +2 -2
- runbooks/vpc/manager_interface.py +366 -351
- runbooks/vpc/networking_wrapper.py +68 -36
- runbooks/vpc/rich_formatters.py +22 -8
- runbooks-0.9.1.dist-info/METADATA +308 -0
- {runbooks-0.7.9.dist-info → runbooks-0.9.1.dist-info}/RECORD +120 -59
- {runbooks-0.7.9.dist-info → runbooks-0.9.1.dist-info}/entry_points.txt +1 -1
- runbooks/finops/cross_validation.py +0 -375
- runbooks-0.7.9.dist-info/METADATA +0 -636
- {runbooks-0.7.9.dist-info → runbooks-0.9.1.dist-info}/WHEEL +0 -0
- {runbooks-0.7.9.dist-info → runbooks-0.9.1.dist-info}/licenses/LICENSE +0 -0
- {runbooks-0.7.9.dist-info → runbooks-0.9.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1108 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
Enhanced Exception Handler - Enterprise Error Management Framework
|
4
|
+
================================================================
|
5
|
+
|
6
|
+
STRATEGIC CONTEXT: Phase 2 rollout extending proven FinOps error handling patterns
|
7
|
+
to provide comprehensive, user-friendly error management across all CloudOps modules.
|
8
|
+
|
9
|
+
This module provides enterprise-grade exception handling with:
|
10
|
+
- Rich CLI formatted error messages with actionable solutions
|
11
|
+
- IAM guidance with profile recommendations
|
12
|
+
- Graceful degradation with recovery paths
|
13
|
+
- Context-aware error resolution
|
14
|
+
- Comprehensive audit trails
|
15
|
+
- Performance-aware error handling
|
16
|
+
|
17
|
+
Features:
|
18
|
+
- AWS service-specific error handling and guidance
|
19
|
+
- Profile-based IAM error resolution
|
20
|
+
- Multi-language error support (EN/JP/KR/VN)
|
21
|
+
- Rich CLI visual error formatting
|
22
|
+
- Automated retry mechanisms with backoff
|
23
|
+
- Error recovery workflows
|
24
|
+
- Comprehensive logging and audit trails
|
25
|
+
|
26
|
+
Author: QA Testing Specialist - CloudOps Automation Testing Expert
|
27
|
+
Version: Phase 2 Implementation
|
28
|
+
"""
|
29
|
+
|
30
|
+
import json
|
31
|
+
import logging
|
32
|
+
import time
|
33
|
+
import traceback
|
34
|
+
from dataclasses import dataclass, field
|
35
|
+
from datetime import datetime
|
36
|
+
from enum import Enum
|
37
|
+
from pathlib import Path
|
38
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
39
|
+
|
40
|
+
import boto3
|
41
|
+
from botocore.exceptions import (
|
42
|
+
BotoCoreError,
|
43
|
+
ClientError,
|
44
|
+
EndpointConnectionError,
|
45
|
+
NoCredentialsError,
|
46
|
+
PartialCredentialsError,
|
47
|
+
ProfileNotFound,
|
48
|
+
TokenRetrievalError,
|
49
|
+
)
|
50
|
+
from botocore.exceptions import (
|
51
|
+
ConnectionError as BotoConnectionError,
|
52
|
+
)
|
53
|
+
|
54
|
+
from ..common.rich_utils import (
|
55
|
+
STATUS_INDICATORS,
|
56
|
+
console,
|
57
|
+
create_panel,
|
58
|
+
create_table,
|
59
|
+
create_tree,
|
60
|
+
format_cost,
|
61
|
+
print_error,
|
62
|
+
print_info,
|
63
|
+
print_status,
|
64
|
+
print_success,
|
65
|
+
print_warning,
|
66
|
+
)
|
67
|
+
|
68
|
+
|
69
|
+
class ErrorSeverity(Enum):
|
70
|
+
"""Error severity levels for enterprise error classification."""
|
71
|
+
|
72
|
+
CRITICAL = "CRITICAL" # System failure, immediate attention required
|
73
|
+
HIGH = "HIGH" # Major functionality impacted
|
74
|
+
MEDIUM = "MEDIUM" # Moderate impact, workaround available
|
75
|
+
LOW = "LOW" # Minor issue, minimal impact
|
76
|
+
INFO = "INFO" # Informational, no action required
|
77
|
+
|
78
|
+
|
79
|
+
class ErrorCategory(Enum):
|
80
|
+
"""Error categories for systematic error handling."""
|
81
|
+
|
82
|
+
AWS_CREDENTIALS = "AWS_CREDENTIALS"
|
83
|
+
AWS_PERMISSIONS = "AWS_PERMISSIONS"
|
84
|
+
AWS_SERVICE = "AWS_SERVICE"
|
85
|
+
AWS_THROTTLING = "AWS_THROTTLING"
|
86
|
+
NETWORK = "NETWORK"
|
87
|
+
CONFIGURATION = "CONFIGURATION"
|
88
|
+
DATA_VALIDATION = "DATA_VALIDATION"
|
89
|
+
PERFORMANCE = "PERFORMANCE"
|
90
|
+
BUSINESS_LOGIC = "BUSINESS_LOGIC"
|
91
|
+
UNKNOWN = "UNKNOWN"
|
92
|
+
|
93
|
+
|
94
|
+
@dataclass
|
95
|
+
class ErrorContext:
|
96
|
+
"""Comprehensive error context for enterprise error management."""
|
97
|
+
|
98
|
+
module_name: str
|
99
|
+
operation: str
|
100
|
+
aws_profile: Optional[str] = None
|
101
|
+
aws_region: Optional[str] = None
|
102
|
+
user_context: Dict[str, Any] = field(default_factory=dict)
|
103
|
+
system_context: Dict[str, Any] = field(default_factory=dict)
|
104
|
+
performance_context: Dict[str, Any] = field(default_factory=dict)
|
105
|
+
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
|
106
|
+
|
107
|
+
|
108
|
+
@dataclass
|
109
|
+
class ErrorResolution:
|
110
|
+
"""Actionable error resolution with recovery paths."""
|
111
|
+
|
112
|
+
title: str
|
113
|
+
description: str
|
114
|
+
action_items: List[str]
|
115
|
+
recovery_commands: List[str] = field(default_factory=list)
|
116
|
+
alternative_approaches: List[str] = field(default_factory=list)
|
117
|
+
estimated_resolution_time: str = "5-10 minutes"
|
118
|
+
requires_admin: bool = False
|
119
|
+
documentation_links: List[str] = field(default_factory=list)
|
120
|
+
|
121
|
+
|
122
|
+
@dataclass
|
123
|
+
class EnhancedError:
|
124
|
+
"""Enhanced error with comprehensive diagnostics and resolution."""
|
125
|
+
|
126
|
+
original_exception: Exception
|
127
|
+
severity: ErrorSeverity
|
128
|
+
category: ErrorCategory
|
129
|
+
context: ErrorContext
|
130
|
+
resolution: ErrorResolution
|
131
|
+
error_code: str
|
132
|
+
retry_possible: bool = False
|
133
|
+
max_retries: int = 3
|
134
|
+
backoff_seconds: float = 1.0
|
135
|
+
audit_trail: List[Dict[str, Any]] = field(default_factory=list)
|
136
|
+
|
137
|
+
|
138
|
+
class EnterpriseExceptionHandler:
|
139
|
+
"""
|
140
|
+
Enterprise exception handler providing comprehensive error management.
|
141
|
+
|
142
|
+
This handler extends proven FinOps error handling patterns to provide:
|
143
|
+
- Context-aware error analysis and resolution
|
144
|
+
- Rich CLI formatted error messages
|
145
|
+
- AWS-specific error guidance with profile recommendations
|
146
|
+
- Automated retry mechanisms
|
147
|
+
- Comprehensive audit trails
|
148
|
+
- Multi-language support for enterprise environments
|
149
|
+
"""
|
150
|
+
|
151
|
+
def __init__(
|
152
|
+
self,
|
153
|
+
module_name: str,
|
154
|
+
enable_auto_retry: bool = True,
|
155
|
+
enable_rich_output: bool = True,
|
156
|
+
audit_file_path: Optional[str] = None,
|
157
|
+
):
|
158
|
+
"""
|
159
|
+
Initialize enterprise exception handler.
|
160
|
+
|
161
|
+
Args:
|
162
|
+
module_name: Name of the CloudOps module using this handler
|
163
|
+
enable_auto_retry: Enable automatic retry for transient errors
|
164
|
+
enable_rich_output: Enable Rich CLI formatted output
|
165
|
+
audit_file_path: Path for error audit trail (optional)
|
166
|
+
"""
|
167
|
+
self.module_name = module_name
|
168
|
+
self.enable_auto_retry = enable_auto_retry
|
169
|
+
self.enable_rich_output = enable_rich_output
|
170
|
+
self.audit_file_path = audit_file_path or f"artifacts/audit/{module_name}_errors.json"
|
171
|
+
|
172
|
+
# Setup logging
|
173
|
+
self.logger = logging.getLogger(f"cloudops.{module_name}.exceptions")
|
174
|
+
|
175
|
+
# Error statistics
|
176
|
+
self.error_counts = {category: 0 for category in ErrorCategory}
|
177
|
+
self.resolution_success_rate = {}
|
178
|
+
|
179
|
+
# AWS service error mappings
|
180
|
+
self.aws_error_mappings = self._initialize_aws_error_mappings()
|
181
|
+
|
182
|
+
# Profile recommendations based on error patterns
|
183
|
+
self.profile_recommendations = self._initialize_profile_recommendations()
|
184
|
+
|
185
|
+
# Create audit directory
|
186
|
+
Path(self.audit_file_path).parent.mkdir(parents=True, exist_ok=True)
|
187
|
+
|
188
|
+
def handle_exception(
|
189
|
+
self, exception: Exception, context: ErrorContext, operation_data: Optional[Dict[str, Any]] = None
|
190
|
+
) -> EnhancedError:
|
191
|
+
"""
|
192
|
+
Handle exception with comprehensive error analysis and resolution guidance.
|
193
|
+
|
194
|
+
Args:
|
195
|
+
exception: The original exception
|
196
|
+
context: Error context information
|
197
|
+
operation_data: Optional operation-specific data for context
|
198
|
+
|
199
|
+
Returns:
|
200
|
+
Enhanced error with resolution guidance
|
201
|
+
"""
|
202
|
+
# Analyze exception
|
203
|
+
enhanced_error = self._analyze_exception(exception, context, operation_data)
|
204
|
+
|
205
|
+
# Track error statistics
|
206
|
+
self.error_counts[enhanced_error.category] += 1
|
207
|
+
|
208
|
+
# Display error with Rich formatting if enabled
|
209
|
+
if self.enable_rich_output:
|
210
|
+
self._display_enhanced_error(enhanced_error)
|
211
|
+
|
212
|
+
# Log error for audit trail
|
213
|
+
self._log_error_to_audit_trail(enhanced_error)
|
214
|
+
|
215
|
+
# Attempt automatic resolution if applicable
|
216
|
+
if enhanced_error.retry_possible and self.enable_auto_retry:
|
217
|
+
resolution_success = self._attempt_auto_resolution(enhanced_error)
|
218
|
+
enhanced_error.audit_trail.append(
|
219
|
+
{
|
220
|
+
"auto_resolution_attempted": True,
|
221
|
+
"resolution_success": resolution_success,
|
222
|
+
"timestamp": datetime.now().isoformat(),
|
223
|
+
}
|
224
|
+
)
|
225
|
+
|
226
|
+
return enhanced_error
|
227
|
+
|
228
|
+
def handle_aws_error(self, error: ClientError, context: ErrorContext, aws_operation: str) -> EnhancedError:
|
229
|
+
"""
|
230
|
+
Handle AWS-specific errors with service-specific guidance.
|
231
|
+
|
232
|
+
Args:
|
233
|
+
error: AWS ClientError exception
|
234
|
+
context: Error context
|
235
|
+
aws_operation: AWS operation that failed
|
236
|
+
|
237
|
+
Returns:
|
238
|
+
Enhanced AWS error with specific guidance
|
239
|
+
"""
|
240
|
+
error_code = error.response.get("Error", {}).get("Code", "Unknown")
|
241
|
+
service_name = (
|
242
|
+
error.response.get("ResponseMetadata", {}).get("HTTPHeaders", {}).get("x-amzn-service", "Unknown")
|
243
|
+
)
|
244
|
+
|
245
|
+
# Get service-specific error analysis
|
246
|
+
error_analysis = self._analyze_aws_error(error, error_code, service_name, aws_operation)
|
247
|
+
|
248
|
+
# Create enhanced error with AWS-specific context
|
249
|
+
enhanced_error = EnhancedError(
|
250
|
+
original_exception=error,
|
251
|
+
severity=error_analysis["severity"],
|
252
|
+
category=error_analysis["category"],
|
253
|
+
context=context,
|
254
|
+
resolution=self._generate_aws_resolution(error, error_code, service_name, context),
|
255
|
+
error_code=f"AWS_{service_name}_{error_code}",
|
256
|
+
retry_possible=error_analysis["retry_possible"],
|
257
|
+
max_retries=error_analysis.get("max_retries", 3),
|
258
|
+
backoff_seconds=error_analysis.get("backoff_seconds", 2.0),
|
259
|
+
)
|
260
|
+
|
261
|
+
return self.handle_exception(
|
262
|
+
enhanced_error.original_exception,
|
263
|
+
context,
|
264
|
+
{
|
265
|
+
"aws_service": service_name,
|
266
|
+
"aws_operation": aws_operation,
|
267
|
+
"error_code": error_code,
|
268
|
+
"enhanced_error": enhanced_error,
|
269
|
+
},
|
270
|
+
)
|
271
|
+
|
272
|
+
def handle_credentials_error(
|
273
|
+
self, error: Union[NoCredentialsError, PartialCredentialsError, ProfileNotFound], context: ErrorContext
|
274
|
+
) -> EnhancedError:
|
275
|
+
"""
|
276
|
+
Handle AWS credentials errors with profile recommendations.
|
277
|
+
|
278
|
+
Args:
|
279
|
+
error: Credentials-related exception
|
280
|
+
context: Error context
|
281
|
+
|
282
|
+
Returns:
|
283
|
+
Enhanced error with credential resolution guidance
|
284
|
+
"""
|
285
|
+
if isinstance(error, NoCredentialsError):
|
286
|
+
resolution = self._generate_credentials_resolution(context, "no_credentials")
|
287
|
+
elif isinstance(error, PartialCredentialsError):
|
288
|
+
resolution = self._generate_credentials_resolution(context, "partial_credentials")
|
289
|
+
elif isinstance(error, ProfileNotFound):
|
290
|
+
resolution = self._generate_profile_not_found_resolution(context)
|
291
|
+
else:
|
292
|
+
resolution = self._generate_credentials_resolution(context, "generic")
|
293
|
+
|
294
|
+
enhanced_error = EnhancedError(
|
295
|
+
original_exception=error,
|
296
|
+
severity=ErrorSeverity.HIGH,
|
297
|
+
category=ErrorCategory.AWS_CREDENTIALS,
|
298
|
+
context=context,
|
299
|
+
resolution=resolution,
|
300
|
+
error_code=f"CREDENTIALS_{type(error).__name__}",
|
301
|
+
retry_possible=False, # Manual intervention required
|
302
|
+
max_retries=0,
|
303
|
+
)
|
304
|
+
|
305
|
+
return self.handle_exception(
|
306
|
+
enhanced_error.original_exception,
|
307
|
+
context,
|
308
|
+
{"credentials_error_type": type(error).__name__, "enhanced_error": enhanced_error},
|
309
|
+
)
|
310
|
+
|
311
|
+
def handle_performance_error(
|
312
|
+
self, operation_name: str, execution_time: float, performance_target: float, context: ErrorContext
|
313
|
+
) -> Optional[EnhancedError]:
|
314
|
+
"""
|
315
|
+
Handle performance-related issues with optimization guidance.
|
316
|
+
|
317
|
+
Args:
|
318
|
+
operation_name: Name of the operation that exceeded performance targets
|
319
|
+
execution_time: Actual execution time
|
320
|
+
performance_target: Target execution time
|
321
|
+
context: Error context
|
322
|
+
|
323
|
+
Returns:
|
324
|
+
Enhanced error if performance target significantly exceeded, None otherwise
|
325
|
+
"""
|
326
|
+
performance_ratio = execution_time / performance_target
|
327
|
+
|
328
|
+
# Only create error if performance significantly exceeded (>150% of target)
|
329
|
+
if performance_ratio <= 1.5:
|
330
|
+
return None
|
331
|
+
|
332
|
+
severity = ErrorSeverity.MEDIUM if performance_ratio < 2.0 else ErrorSeverity.HIGH
|
333
|
+
|
334
|
+
resolution = ErrorResolution(
|
335
|
+
title=f"Performance Optimization Required: {operation_name}",
|
336
|
+
description=f"Operation took {execution_time:.1f}s, exceeding target of {performance_target:.1f}s by {((performance_ratio - 1) * 100):.1f}%",
|
337
|
+
action_items=[
|
338
|
+
f"Review {operation_name} operation for optimization opportunities",
|
339
|
+
"Check AWS API throttling and request patterns",
|
340
|
+
"Consider implementing parallel processing where applicable",
|
341
|
+
"Monitor resource usage during operation execution",
|
342
|
+
"Review AWS region selection for optimal performance",
|
343
|
+
],
|
344
|
+
recovery_commands=[
|
345
|
+
f"# Optimize {operation_name} operation",
|
346
|
+
f"runbooks {self.module_name} {operation_name} --parallel",
|
347
|
+
f"runbooks {self.module_name} {operation_name} --region us-east-1", # Closest region
|
348
|
+
f"runbooks {self.module_name} {operation_name} --batch-size 10",
|
349
|
+
],
|
350
|
+
estimated_resolution_time="15-30 minutes",
|
351
|
+
requires_admin=False,
|
352
|
+
)
|
353
|
+
|
354
|
+
# Create performance exception
|
355
|
+
performance_exception = Exception(
|
356
|
+
f"Performance target exceeded: {execution_time:.1f}s > {performance_target:.1f}s"
|
357
|
+
)
|
358
|
+
|
359
|
+
enhanced_error = EnhancedError(
|
360
|
+
original_exception=performance_exception,
|
361
|
+
severity=severity,
|
362
|
+
category=ErrorCategory.PERFORMANCE,
|
363
|
+
context=context,
|
364
|
+
resolution=resolution,
|
365
|
+
error_code=f"PERFORMANCE_{operation_name.upper()}_EXCEEDED",
|
366
|
+
retry_possible=True,
|
367
|
+
max_retries=2,
|
368
|
+
backoff_seconds=5.0,
|
369
|
+
)
|
370
|
+
|
371
|
+
return self.handle_exception(
|
372
|
+
enhanced_error.original_exception,
|
373
|
+
context,
|
374
|
+
{
|
375
|
+
"operation_name": operation_name,
|
376
|
+
"execution_time": execution_time,
|
377
|
+
"performance_target": performance_target,
|
378
|
+
"performance_ratio": performance_ratio,
|
379
|
+
"enhanced_error": enhanced_error,
|
380
|
+
},
|
381
|
+
)
|
382
|
+
|
383
|
+
def graceful_degradation(
|
384
|
+
self,
|
385
|
+
primary_operation: Callable,
|
386
|
+
fallback_operations: List[Callable],
|
387
|
+
context: ErrorContext,
|
388
|
+
operation_args: Optional[Tuple] = None,
|
389
|
+
operation_kwargs: Optional[Dict] = None,
|
390
|
+
) -> Tuple[Any, Optional[EnhancedError]]:
|
391
|
+
"""
|
392
|
+
Execute operation with graceful degradation to fallback approaches.
|
393
|
+
|
394
|
+
Args:
|
395
|
+
primary_operation: Primary operation to attempt
|
396
|
+
fallback_operations: List of fallback operations to try
|
397
|
+
context: Error context
|
398
|
+
operation_args: Arguments for operations
|
399
|
+
operation_kwargs: Keyword arguments for operations
|
400
|
+
|
401
|
+
Returns:
|
402
|
+
Tuple of (result, enhanced_error_if_all_failed)
|
403
|
+
"""
|
404
|
+
operation_args = operation_args or ()
|
405
|
+
operation_kwargs = operation_kwargs or {}
|
406
|
+
|
407
|
+
operations = [primary_operation] + fallback_operations
|
408
|
+
last_error = None
|
409
|
+
|
410
|
+
for i, operation in enumerate(operations):
|
411
|
+
try:
|
412
|
+
if i == 0:
|
413
|
+
print_info(f"🚀 Attempting primary operation: {operation.__name__}")
|
414
|
+
else:
|
415
|
+
print_warning(f"⚠️ Attempting fallback {i}: {operation.__name__}")
|
416
|
+
|
417
|
+
result = operation(*operation_args, **operation_kwargs)
|
418
|
+
|
419
|
+
if i > 0:
|
420
|
+
print_success(f"✅ Fallback operation succeeded: {operation.__name__}")
|
421
|
+
|
422
|
+
return result, None
|
423
|
+
|
424
|
+
except Exception as e:
|
425
|
+
last_error = e
|
426
|
+
|
427
|
+
if i == 0:
|
428
|
+
print_warning(f"⚠️ Primary operation failed: {operation.__name__}")
|
429
|
+
else:
|
430
|
+
print_error(f"❌ Fallback {i} failed: {operation.__name__}")
|
431
|
+
|
432
|
+
# Create enhanced error for logging
|
433
|
+
enhanced_error = self._analyze_exception(
|
434
|
+
e, context, {"operation_name": operation.__name__, "attempt_number": i + 1, "is_fallback": i > 0}
|
435
|
+
)
|
436
|
+
|
437
|
+
self._log_error_to_audit_trail(enhanced_error)
|
438
|
+
|
439
|
+
# All operations failed
|
440
|
+
print_error("❌ All operations failed, including fallbacks")
|
441
|
+
|
442
|
+
final_enhanced_error = self._analyze_exception(
|
443
|
+
last_error,
|
444
|
+
context,
|
445
|
+
{"all_operations_failed": True, "operations_attempted": len(operations), "final_failure": True},
|
446
|
+
)
|
447
|
+
|
448
|
+
return None, final_enhanced_error
|
449
|
+
|
450
|
+
def create_error_recovery_workflow(self, enhanced_error: EnhancedError, interactive: bool = True) -> bool:
|
451
|
+
"""
|
452
|
+
Create interactive error recovery workflow.
|
453
|
+
|
454
|
+
Args:
|
455
|
+
enhanced_error: Enhanced error with resolution guidance
|
456
|
+
interactive: Enable interactive recovery prompts
|
457
|
+
|
458
|
+
Returns:
|
459
|
+
True if recovery was successful, False otherwise
|
460
|
+
"""
|
461
|
+
if not self.enable_rich_output:
|
462
|
+
return False
|
463
|
+
|
464
|
+
print_info("🔧 Starting error recovery workflow...")
|
465
|
+
|
466
|
+
# Display recovery options
|
467
|
+
recovery_table = create_table(
|
468
|
+
title=f"🛠️ Recovery Options for {enhanced_error.error_code}",
|
469
|
+
columns=[
|
470
|
+
{"name": "Step", "style": "cyan", "justify": "center"},
|
471
|
+
{"name": "Action", "style": "white", "justify": "left"},
|
472
|
+
{"name": "Required", "style": "yellow", "justify": "center"},
|
473
|
+
],
|
474
|
+
)
|
475
|
+
|
476
|
+
for i, action in enumerate(enhanced_error.resolution.action_items, 1):
|
477
|
+
required = "✅" if i <= 2 else "⚪" # First 2 actions are required
|
478
|
+
recovery_table.add_row(str(i), action, required)
|
479
|
+
|
480
|
+
console.print(recovery_table)
|
481
|
+
|
482
|
+
# Display recovery commands if available
|
483
|
+
if enhanced_error.resolution.recovery_commands:
|
484
|
+
commands_panel = create_panel(
|
485
|
+
"\n".join(enhanced_error.resolution.recovery_commands),
|
486
|
+
title="🔄 Recovery Commands",
|
487
|
+
border_style="green",
|
488
|
+
)
|
489
|
+
console.print(commands_panel)
|
490
|
+
|
491
|
+
# Interactive recovery if enabled
|
492
|
+
if interactive:
|
493
|
+
from ..common.rich_utils import confirm_action
|
494
|
+
|
495
|
+
if confirm_action("Would you like to proceed with automated recovery?", default=True):
|
496
|
+
return self._execute_automated_recovery(enhanced_error)
|
497
|
+
|
498
|
+
return False
|
499
|
+
|
500
|
+
def generate_error_report(self, time_period_hours: int = 24) -> Dict[str, Any]:
|
501
|
+
"""
|
502
|
+
Generate comprehensive error report for enterprise monitoring.
|
503
|
+
|
504
|
+
Args:
|
505
|
+
time_period_hours: Time period for error analysis
|
506
|
+
|
507
|
+
Returns:
|
508
|
+
Comprehensive error report
|
509
|
+
"""
|
510
|
+
report = {
|
511
|
+
"report_metadata": {
|
512
|
+
"module": self.module_name,
|
513
|
+
"time_period_hours": time_period_hours,
|
514
|
+
"report_timestamp": datetime.now().isoformat(),
|
515
|
+
"handler_version": "Phase 2 Implementation",
|
516
|
+
},
|
517
|
+
"error_statistics": {
|
518
|
+
"total_errors": sum(self.error_counts.values()),
|
519
|
+
"errors_by_category": dict(self.error_counts),
|
520
|
+
"error_trends": self._calculate_error_trends(),
|
521
|
+
"resolution_success_rate": self.resolution_success_rate,
|
522
|
+
},
|
523
|
+
"top_error_patterns": self._analyze_error_patterns(),
|
524
|
+
"performance_impact": self._calculate_performance_impact(),
|
525
|
+
"recommendations": self._generate_recommendations(),
|
526
|
+
"audit_trail_summary": self._summarize_audit_trail(time_period_hours),
|
527
|
+
}
|
528
|
+
|
529
|
+
return report
|
530
|
+
|
531
|
+
def display_error_report(self, report: Dict[str, Any]):
|
532
|
+
"""Display error report with Rich CLI formatting."""
|
533
|
+
print_info("📊 Enterprise Error Analysis Report")
|
534
|
+
|
535
|
+
# Error statistics table
|
536
|
+
stats_table = create_table(
|
537
|
+
title="🔍 Error Statistics",
|
538
|
+
columns=[
|
539
|
+
{"name": "Category", "style": "cyan", "justify": "left"},
|
540
|
+
{"name": "Count", "style": "red", "justify": "right"},
|
541
|
+
{"name": "Percentage", "style": "yellow", "justify": "right"},
|
542
|
+
],
|
543
|
+
)
|
544
|
+
|
545
|
+
total_errors = report["error_statistics"]["total_errors"]
|
546
|
+
for category, count in report["error_statistics"]["errors_by_category"].items():
|
547
|
+
if count > 0:
|
548
|
+
percentage = (count / total_errors) * 100 if total_errors > 0 else 0
|
549
|
+
stats_table.add_row(category.replace("_", " ").title(), str(count), f"{percentage:.1f}%")
|
550
|
+
|
551
|
+
console.print(stats_table)
|
552
|
+
|
553
|
+
# Recommendations panel
|
554
|
+
if report["recommendations"]:
|
555
|
+
recommendations_text = "\n".join([f"• {rec}" for rec in report["recommendations"]])
|
556
|
+
recommendations_panel = create_panel(recommendations_text, title="💡 Recommendations", border_style="blue")
|
557
|
+
console.print(recommendations_panel)
|
558
|
+
|
559
|
+
# Private methods for error analysis and handling
|
560
|
+
def _analyze_exception(
|
561
|
+
self, exception: Exception, context: ErrorContext, operation_data: Optional[Dict[str, Any]] = None
|
562
|
+
) -> EnhancedError:
|
563
|
+
"""Analyze exception and create enhanced error with resolution guidance."""
|
564
|
+
# Check if this is already an enhanced error
|
565
|
+
if operation_data and "enhanced_error" in operation_data:
|
566
|
+
return operation_data["enhanced_error"]
|
567
|
+
|
568
|
+
# Determine error category and severity
|
569
|
+
category = self._classify_error(exception)
|
570
|
+
severity = self._determine_severity(exception, category)
|
571
|
+
|
572
|
+
# Generate resolution guidance
|
573
|
+
resolution = self._generate_resolution(exception, category, context)
|
574
|
+
|
575
|
+
# Create error code
|
576
|
+
error_code = f"{self.module_name.upper()}_{category.value}_{type(exception).__name__}"
|
577
|
+
|
578
|
+
# Determine retry possibility
|
579
|
+
retry_possible = self._is_retryable_error(exception, category)
|
580
|
+
|
581
|
+
enhanced_error = EnhancedError(
|
582
|
+
original_exception=exception,
|
583
|
+
severity=severity,
|
584
|
+
category=category,
|
585
|
+
context=context,
|
586
|
+
resolution=resolution,
|
587
|
+
error_code=error_code,
|
588
|
+
retry_possible=retry_possible,
|
589
|
+
audit_trail=[
|
590
|
+
{
|
591
|
+
"created": datetime.now().isoformat(),
|
592
|
+
"analysis_completed": True,
|
593
|
+
"operation_data": operation_data or {},
|
594
|
+
}
|
595
|
+
],
|
596
|
+
)
|
597
|
+
|
598
|
+
return enhanced_error
|
599
|
+
|
600
|
+
def _classify_error(self, exception: Exception) -> ErrorCategory:
|
601
|
+
"""Classify exception into appropriate category."""
|
602
|
+
if isinstance(exception, (NoCredentialsError, PartialCredentialsError, ProfileNotFound)):
|
603
|
+
return ErrorCategory.AWS_CREDENTIALS
|
604
|
+
elif isinstance(exception, ClientError):
|
605
|
+
error_code = exception.response.get("Error", {}).get("Code", "")
|
606
|
+
if error_code in ["AccessDenied", "Forbidden", "UnauthorizedOperation"]:
|
607
|
+
return ErrorCategory.AWS_PERMISSIONS
|
608
|
+
elif error_code in ["Throttling", "ThrottlingException", "RequestLimitExceeded"]:
|
609
|
+
return ErrorCategory.AWS_THROTTLING
|
610
|
+
else:
|
611
|
+
return ErrorCategory.AWS_SERVICE
|
612
|
+
elif isinstance(exception, (EndpointConnectionError, BotoConnectionError, ConnectionError)):
|
613
|
+
return ErrorCategory.NETWORK
|
614
|
+
elif isinstance(exception, (ValueError, TypeError)) and "validation" in str(exception).lower():
|
615
|
+
return ErrorCategory.DATA_VALIDATION
|
616
|
+
elif "timeout" in str(exception).lower() or "performance" in str(exception).lower():
|
617
|
+
return ErrorCategory.PERFORMANCE
|
618
|
+
elif isinstance(exception, (FileNotFoundError, PermissionError)):
|
619
|
+
return ErrorCategory.CONFIGURATION
|
620
|
+
else:
|
621
|
+
return ErrorCategory.UNKNOWN
|
622
|
+
|
623
|
+
def _determine_severity(self, exception: Exception, category: ErrorCategory) -> ErrorSeverity:
|
624
|
+
"""Determine error severity based on exception type and category."""
|
625
|
+
if category == ErrorCategory.AWS_CREDENTIALS:
|
626
|
+
return ErrorSeverity.HIGH
|
627
|
+
elif category == ErrorCategory.AWS_PERMISSIONS:
|
628
|
+
return ErrorSeverity.HIGH
|
629
|
+
elif category == ErrorCategory.NETWORK:
|
630
|
+
return ErrorSeverity.MEDIUM
|
631
|
+
elif category == ErrorCategory.AWS_THROTTLING:
|
632
|
+
return ErrorSeverity.LOW # Usually temporary
|
633
|
+
elif category == ErrorCategory.PERFORMANCE:
|
634
|
+
return ErrorSeverity.MEDIUM
|
635
|
+
elif category == ErrorCategory.DATA_VALIDATION:
|
636
|
+
return ErrorSeverity.HIGH
|
637
|
+
else:
|
638
|
+
return ErrorSeverity.MEDIUM
|
639
|
+
|
640
|
+
def _generate_resolution(
|
641
|
+
self, exception: Exception, category: ErrorCategory, context: ErrorContext
|
642
|
+
) -> ErrorResolution:
|
643
|
+
"""Generate resolution guidance based on error category."""
|
644
|
+
if category == ErrorCategory.AWS_CREDENTIALS:
|
645
|
+
return self._generate_credentials_resolution(context, "generic")
|
646
|
+
elif category == ErrorCategory.AWS_PERMISSIONS:
|
647
|
+
return self._generate_permissions_resolution(exception, context)
|
648
|
+
elif category == ErrorCategory.NETWORK:
|
649
|
+
return self._generate_network_resolution(exception, context)
|
650
|
+
elif category == ErrorCategory.AWS_THROTTLING:
|
651
|
+
return self._generate_throttling_resolution(exception, context)
|
652
|
+
else:
|
653
|
+
return self._generate_generic_resolution(exception, context)
|
654
|
+
|
655
|
+
def _generate_credentials_resolution(self, context: ErrorContext, error_type: str) -> ErrorResolution:
|
656
|
+
"""Generate credentials-specific resolution guidance."""
|
657
|
+
if error_type == "no_credentials":
|
658
|
+
title = "AWS Credentials Not Found"
|
659
|
+
description = "No AWS credentials were found for authentication"
|
660
|
+
action_items = [
|
661
|
+
"Configure AWS credentials using one of the following methods:",
|
662
|
+
"1. Run 'aws configure' to set up default credentials",
|
663
|
+
"2. Set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables",
|
664
|
+
"3. Use AWS SSO login: 'aws sso login --profile your-profile'",
|
665
|
+
"4. Use IAM roles if running on EC2",
|
666
|
+
]
|
667
|
+
recovery_commands = [
|
668
|
+
"aws configure",
|
669
|
+
"aws sso login --profile " + (context.aws_profile or "default"),
|
670
|
+
"export AWS_PROFILE=" + (context.aws_profile or "default"),
|
671
|
+
]
|
672
|
+
elif error_type == "partial_credentials":
|
673
|
+
title = "Incomplete AWS Credentials"
|
674
|
+
description = "AWS credentials are partially configured"
|
675
|
+
action_items = [
|
676
|
+
"Ensure all required credential components are provided:",
|
677
|
+
"1. Access Key ID",
|
678
|
+
"2. Secret Access Key",
|
679
|
+
"3. Session Token (if using temporary credentials)",
|
680
|
+
"4. Region configuration",
|
681
|
+
]
|
682
|
+
recovery_commands = [
|
683
|
+
"aws configure list",
|
684
|
+
"aws configure set region " + (context.aws_region or "us-east-1"),
|
685
|
+
]
|
686
|
+
else:
|
687
|
+
title = "AWS Credentials Issue"
|
688
|
+
description = "Generic AWS credentials problem detected"
|
689
|
+
action_items = [
|
690
|
+
"Verify AWS credentials configuration",
|
691
|
+
"Check AWS profile settings",
|
692
|
+
"Ensure credentials have not expired",
|
693
|
+
]
|
694
|
+
recovery_commands = ["aws sts get-caller-identity", "aws configure list-profiles"]
|
695
|
+
|
696
|
+
return ErrorResolution(
|
697
|
+
title=title,
|
698
|
+
description=description,
|
699
|
+
action_items=action_items,
|
700
|
+
recovery_commands=recovery_commands,
|
701
|
+
estimated_resolution_time="2-5 minutes",
|
702
|
+
requires_admin=False,
|
703
|
+
documentation_links=["https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html"],
|
704
|
+
)
|
705
|
+
|
706
|
+
def _generate_permissions_resolution(self, exception: Exception, context: ErrorContext) -> ErrorResolution:
|
707
|
+
"""Generate AWS permissions resolution guidance."""
|
708
|
+
error_code = ""
|
709
|
+
if isinstance(exception, ClientError):
|
710
|
+
error_code = exception.response.get("Error", {}).get("Code", "")
|
711
|
+
|
712
|
+
# Get recommended profile based on operation
|
713
|
+
recommended_profiles = self.profile_recommendations.get(context.operation, [])
|
714
|
+
|
715
|
+
action_items = [
|
716
|
+
f"AWS permissions error detected: {error_code}",
|
717
|
+
"Verify your AWS profile has the required permissions for this operation",
|
718
|
+
"Consider switching to a profile with appropriate permissions",
|
719
|
+
]
|
720
|
+
|
721
|
+
if recommended_profiles:
|
722
|
+
action_items.extend(
|
723
|
+
["Recommended profiles for this operation:", *[f" • {profile}" for profile in recommended_profiles]]
|
724
|
+
)
|
725
|
+
|
726
|
+
recovery_commands = ["aws sts get-caller-identity", "aws iam get-user", "aws iam list-attached-user-policies"]
|
727
|
+
|
728
|
+
if recommended_profiles:
|
729
|
+
recovery_commands.extend(
|
730
|
+
[
|
731
|
+
f"export AWS_PROFILE={recommended_profiles[0]}",
|
732
|
+
f"runbooks {context.module_name} {context.operation} --profile {recommended_profiles[0]}",
|
733
|
+
]
|
734
|
+
)
|
735
|
+
|
736
|
+
return ErrorResolution(
|
737
|
+
title="AWS Permissions Error",
|
738
|
+
description=f"Insufficient permissions for {context.operation} operation",
|
739
|
+
action_items=action_items,
|
740
|
+
recovery_commands=recovery_commands,
|
741
|
+
estimated_resolution_time="10-15 minutes",
|
742
|
+
requires_admin=True,
|
743
|
+
documentation_links=["https://docs.aws.amazon.com/IAM/latest/UserGuide/access_policies.html"],
|
744
|
+
)
|
745
|
+
|
746
|
+
def _generate_profile_not_found_resolution(self, context: ErrorContext) -> ErrorResolution:
|
747
|
+
"""Generate profile not found resolution guidance."""
|
748
|
+
return ErrorResolution(
|
749
|
+
title="AWS Profile Not Found",
|
750
|
+
description=f"AWS profile '{context.aws_profile}' was not found in your configuration",
|
751
|
+
action_items=[
|
752
|
+
"Verify the profile name is spelled correctly",
|
753
|
+
"Check available AWS profiles",
|
754
|
+
"Configure the profile if it doesn't exist",
|
755
|
+
"Use the default profile if appropriate",
|
756
|
+
],
|
757
|
+
recovery_commands=[
|
758
|
+
"aws configure list-profiles",
|
759
|
+
f"aws configure --profile {context.aws_profile or 'your-profile'}",
|
760
|
+
"aws sso login --profile " + (context.aws_profile or "your-profile"),
|
761
|
+
],
|
762
|
+
alternative_approaches=[
|
763
|
+
"Use --profile default if you have default credentials configured",
|
764
|
+
"Use environment variables instead of profiles",
|
765
|
+
],
|
766
|
+
estimated_resolution_time="5-10 minutes",
|
767
|
+
requires_admin=False,
|
768
|
+
)
|
769
|
+
|
770
|
+
def _generate_network_resolution(self, exception: Exception, context: ErrorContext) -> ErrorResolution:
|
771
|
+
"""Generate network error resolution guidance."""
|
772
|
+
return ErrorResolution(
|
773
|
+
title="Network Connection Error",
|
774
|
+
description="Unable to connect to AWS services",
|
775
|
+
action_items=[
|
776
|
+
"Check your internet connection",
|
777
|
+
"Verify AWS service endpoints are accessible",
|
778
|
+
"Check firewall and proxy settings",
|
779
|
+
"Try a different AWS region if applicable",
|
780
|
+
],
|
781
|
+
recovery_commands=[
|
782
|
+
"ping aws.amazon.com",
|
783
|
+
"nslookup " + (context.aws_region or "us-east-1") + ".amazonaws.com",
|
784
|
+
f"runbooks {context.module_name} {context.operation} --region us-west-2",
|
785
|
+
],
|
786
|
+
estimated_resolution_time="5-15 minutes",
|
787
|
+
requires_admin=False,
|
788
|
+
)
|
789
|
+
|
790
|
+
def _generate_throttling_resolution(self, exception: Exception, context: ErrorContext) -> ErrorResolution:
|
791
|
+
"""Generate throttling error resolution guidance."""
|
792
|
+
return ErrorResolution(
|
793
|
+
title="AWS API Throttling",
|
794
|
+
description="Request rate exceeded AWS API limits",
|
795
|
+
action_items=[
|
796
|
+
"Reduce request frequency",
|
797
|
+
"Implement exponential backoff",
|
798
|
+
"Consider using pagination for large datasets",
|
799
|
+
"Monitor API usage patterns",
|
800
|
+
],
|
801
|
+
recovery_commands=[
|
802
|
+
f"runbooks {context.module_name} {context.operation} --batch-size 10",
|
803
|
+
f"runbooks {context.module_name} {context.operation} --delay 2",
|
804
|
+
],
|
805
|
+
estimated_resolution_time="Automatic retry in 30-60 seconds",
|
806
|
+
requires_admin=False,
|
807
|
+
)
|
808
|
+
|
809
|
+
def _generate_generic_resolution(self, exception: Exception, context: ErrorContext) -> ErrorResolution:
|
810
|
+
"""Generate generic resolution guidance."""
|
811
|
+
return ErrorResolution(
|
812
|
+
title=f"Error in {context.operation}",
|
813
|
+
description=str(exception),
|
814
|
+
action_items=[
|
815
|
+
"Review the error message for specific details",
|
816
|
+
"Check operation parameters and configuration",
|
817
|
+
"Verify system prerequisites are met",
|
818
|
+
"Consider enabling verbose logging for more details",
|
819
|
+
],
|
820
|
+
recovery_commands=[
|
821
|
+
f"runbooks {context.module_name} {context.operation} --verbose",
|
822
|
+
f"runbooks {context.module_name} {context.operation} --dry-run",
|
823
|
+
],
|
824
|
+
estimated_resolution_time="10-20 minutes",
|
825
|
+
requires_admin=False,
|
826
|
+
)
|
827
|
+
|
828
|
+
def _is_retryable_error(self, exception: Exception, category: ErrorCategory) -> bool:
|
829
|
+
"""Determine if error is retryable."""
|
830
|
+
if category == ErrorCategory.AWS_THROTTLING:
|
831
|
+
return True
|
832
|
+
elif category == ErrorCategory.NETWORK:
|
833
|
+
return True
|
834
|
+
elif category == ErrorCategory.AWS_SERVICE:
|
835
|
+
if isinstance(exception, ClientError):
|
836
|
+
error_code = exception.response.get("Error", {}).get("Code", "")
|
837
|
+
return error_code in ["InternalError", "ServiceUnavailable", "RequestTimeout"]
|
838
|
+
return False
|
839
|
+
|
840
|
+
def _display_enhanced_error(self, enhanced_error: EnhancedError):
|
841
|
+
"""Display enhanced error with Rich CLI formatting."""
|
842
|
+
# Error severity indicator
|
843
|
+
severity_colors = {
|
844
|
+
ErrorSeverity.CRITICAL: "red bold reverse",
|
845
|
+
ErrorSeverity.HIGH: "red bold",
|
846
|
+
ErrorSeverity.MEDIUM: "yellow bold",
|
847
|
+
ErrorSeverity.LOW: "yellow",
|
848
|
+
ErrorSeverity.INFO: "blue",
|
849
|
+
}
|
850
|
+
|
851
|
+
severity_icons = {
|
852
|
+
ErrorSeverity.CRITICAL: "🚨",
|
853
|
+
ErrorSeverity.HIGH: "🔴",
|
854
|
+
ErrorSeverity.MEDIUM: "🟡",
|
855
|
+
ErrorSeverity.LOW: "🟠",
|
856
|
+
ErrorSeverity.INFO: "🔵",
|
857
|
+
}
|
858
|
+
|
859
|
+
# Main error panel
|
860
|
+
error_content = f"""
|
861
|
+
[bold red]Error:[/] {enhanced_error.resolution.title}
|
862
|
+
|
863
|
+
[bold yellow]Details:[/] {enhanced_error.resolution.description}
|
864
|
+
|
865
|
+
[bold cyan]Module:[/] {enhanced_error.context.module_name}
|
866
|
+
[bold cyan]Operation:[/] {enhanced_error.context.operation}
|
867
|
+
[bold cyan]Severity:[/] {severity_icons[enhanced_error.severity]} {enhanced_error.severity.value}
|
868
|
+
[bold cyan]Category:[/] {enhanced_error.category.value.replace("_", " ").title()}
|
869
|
+
"""
|
870
|
+
|
871
|
+
error_panel = create_panel(
|
872
|
+
error_content.strip(),
|
873
|
+
title=f"⚠️ {enhanced_error.error_code}",
|
874
|
+
border_style=severity_colors[enhanced_error.severity].split()[0],
|
875
|
+
)
|
876
|
+
console.print(error_panel)
|
877
|
+
|
878
|
+
# Resolution guidance
|
879
|
+
if enhanced_error.resolution.action_items:
|
880
|
+
resolution_table = create_table(
|
881
|
+
title="🛠️ Resolution Steps",
|
882
|
+
columns=[
|
883
|
+
{"name": "Step", "style": "cyan", "justify": "center"},
|
884
|
+
{"name": "Action", "style": "white", "justify": "left"},
|
885
|
+
],
|
886
|
+
)
|
887
|
+
|
888
|
+
for i, action in enumerate(enhanced_error.resolution.action_items, 1):
|
889
|
+
resolution_table.add_row(str(i), action)
|
890
|
+
|
891
|
+
console.print(resolution_table)
|
892
|
+
|
893
|
+
# Recovery commands
|
894
|
+
if enhanced_error.resolution.recovery_commands:
|
895
|
+
commands_text = "\n".join(enhanced_error.resolution.recovery_commands)
|
896
|
+
commands_panel = create_panel(commands_text, title="💻 Recovery Commands", border_style="green")
|
897
|
+
console.print(commands_panel)
|
898
|
+
|
899
|
+
# Time estimate
|
900
|
+
print_info(f"⏱️ Estimated resolution time: {enhanced_error.resolution.estimated_resolution_time}")
|
901
|
+
|
902
|
+
if enhanced_error.resolution.requires_admin:
|
903
|
+
print_warning("👤 Administrator privileges may be required")
|
904
|
+
|
905
|
+
def _log_error_to_audit_trail(self, enhanced_error: EnhancedError):
|
906
|
+
"""Log error to audit trail."""
|
907
|
+
audit_entry = {
|
908
|
+
"timestamp": datetime.now().isoformat(),
|
909
|
+
"module": self.module_name,
|
910
|
+
"error_code": enhanced_error.error_code,
|
911
|
+
"severity": enhanced_error.severity.value,
|
912
|
+
"category": enhanced_error.category.value,
|
913
|
+
"operation": enhanced_error.context.operation,
|
914
|
+
"aws_profile": enhanced_error.context.aws_profile,
|
915
|
+
"error_message": str(enhanced_error.original_exception),
|
916
|
+
"resolution_title": enhanced_error.resolution.title,
|
917
|
+
"retry_possible": enhanced_error.retry_possible,
|
918
|
+
"audit_trail": enhanced_error.audit_trail,
|
919
|
+
}
|
920
|
+
|
921
|
+
# Append to audit file
|
922
|
+
try:
|
923
|
+
audit_data = []
|
924
|
+
if Path(self.audit_file_path).exists():
|
925
|
+
with open(self.audit_file_path, "r") as f:
|
926
|
+
audit_data = json.load(f)
|
927
|
+
|
928
|
+
audit_data.append(audit_entry)
|
929
|
+
|
930
|
+
with open(self.audit_file_path, "w") as f:
|
931
|
+
json.dump(audit_data, f, indent=2, default=str)
|
932
|
+
|
933
|
+
except Exception as e:
|
934
|
+
self.logger.warning(f"Failed to write audit entry: {e}")
|
935
|
+
|
936
|
+
def _attempt_auto_resolution(self, enhanced_error: EnhancedError) -> bool:
|
937
|
+
"""Attempt automated error resolution."""
|
938
|
+
if enhanced_error.category == ErrorCategory.AWS_THROTTLING:
|
939
|
+
# Implement exponential backoff
|
940
|
+
time.sleep(enhanced_error.backoff_seconds)
|
941
|
+
return True
|
942
|
+
|
943
|
+
# For other error types, manual intervention is typically required
|
944
|
+
return False
|
945
|
+
|
946
|
+
def _initialize_aws_error_mappings(self) -> Dict[str, Dict[str, Any]]:
|
947
|
+
"""Initialize AWS service error mappings."""
|
948
|
+
return {
|
949
|
+
"AccessDenied": {
|
950
|
+
"severity": ErrorSeverity.HIGH,
|
951
|
+
"category": ErrorCategory.AWS_PERMISSIONS,
|
952
|
+
"retry_possible": False,
|
953
|
+
"description": "Access denied to AWS resource",
|
954
|
+
},
|
955
|
+
"Throttling": {
|
956
|
+
"severity": ErrorSeverity.LOW,
|
957
|
+
"category": ErrorCategory.AWS_THROTTLING,
|
958
|
+
"retry_possible": True,
|
959
|
+
"max_retries": 5,
|
960
|
+
"backoff_seconds": 2.0,
|
961
|
+
},
|
962
|
+
"InvalidParameterValue": {
|
963
|
+
"severity": ErrorSeverity.MEDIUM,
|
964
|
+
"category": ErrorCategory.DATA_VALIDATION,
|
965
|
+
"retry_possible": False,
|
966
|
+
"description": "Invalid parameter provided to AWS API",
|
967
|
+
},
|
968
|
+
}
|
969
|
+
|
970
|
+
def _initialize_profile_recommendations(self) -> Dict[str, List[str]]:
|
971
|
+
"""Initialize profile recommendations for different operations."""
|
972
|
+
return {
|
973
|
+
"inventory": ["ams-admin-ReadOnlyAccess-909135376185", "ams-centralised-ops-ReadOnlyAccess-335083429030"],
|
974
|
+
"operate": ["ams-centralised-ops-ReadOnlyAccess-335083429030", "ams-admin-ReadOnlyAccess-909135376185"],
|
975
|
+
"finops": ["ams-admin-Billing-ReadOnlyAccess-909135376185", "ams-admin-ReadOnlyAccess-909135376185"],
|
976
|
+
"security": ["ams-admin-ReadOnlyAccess-909135376185"],
|
977
|
+
"cfat": ["ams-admin-ReadOnlyAccess-909135376185"],
|
978
|
+
}
|
979
|
+
|
980
|
+
def _analyze_aws_error(self, error: ClientError, error_code: str, service: str, operation: str) -> Dict[str, Any]:
|
981
|
+
"""Analyze AWS-specific error details."""
|
982
|
+
base_analysis = self.aws_error_mappings.get(
|
983
|
+
error_code,
|
984
|
+
{"severity": ErrorSeverity.MEDIUM, "category": ErrorCategory.AWS_SERVICE, "retry_possible": False},
|
985
|
+
)
|
986
|
+
|
987
|
+
# Service-specific adjustments
|
988
|
+
if service == "ce" and error_code == "AccessDenied":
|
989
|
+
# Cost Explorer requires special billing permissions
|
990
|
+
base_analysis["recommended_profiles"] = ["ams-admin-Billing-ReadOnlyAccess-909135376185"]
|
991
|
+
|
992
|
+
return base_analysis
|
993
|
+
|
994
|
+
def _generate_aws_resolution(
|
995
|
+
self, error: ClientError, error_code: str, service: str, context: ErrorContext
|
996
|
+
) -> ErrorResolution:
|
997
|
+
"""Generate AWS-specific resolution guidance."""
|
998
|
+
service_friendly = {
|
999
|
+
"ce": "Cost Explorer",
|
1000
|
+
"ec2": "EC2",
|
1001
|
+
"s3": "S3",
|
1002
|
+
"iam": "IAM",
|
1003
|
+
"organizations": "Organizations",
|
1004
|
+
}.get(service, service.upper())
|
1005
|
+
|
1006
|
+
if error_code == "AccessDenied":
|
1007
|
+
return self._generate_permissions_resolution(error, context)
|
1008
|
+
elif error_code in ["Throttling", "ThrottlingException"]:
|
1009
|
+
return self._generate_throttling_resolution(error, context)
|
1010
|
+
else:
|
1011
|
+
return ErrorResolution(
|
1012
|
+
title=f"{service_friendly} Service Error",
|
1013
|
+
description=f"AWS {service_friendly} service error: {error_code}",
|
1014
|
+
action_items=[
|
1015
|
+
f"Review {service_friendly} service documentation",
|
1016
|
+
"Check API parameters and request format",
|
1017
|
+
"Verify service availability in your region",
|
1018
|
+
"Consider alternative approaches if available",
|
1019
|
+
],
|
1020
|
+
recovery_commands=[
|
1021
|
+
f"aws {service} help",
|
1022
|
+
f"runbooks {context.module_name} {context.operation} --dry-run",
|
1023
|
+
],
|
1024
|
+
estimated_resolution_time="10-20 minutes",
|
1025
|
+
requires_admin=False,
|
1026
|
+
)
|
1027
|
+
|
1028
|
+
# Additional helper methods for error analysis
|
1029
|
+
def _calculate_error_trends(self) -> Dict[str, Any]:
|
1030
|
+
"""Calculate error trends for reporting."""
|
1031
|
+
return {"trend_analysis": "Stable", "peak_error_times": [], "common_patterns": []}
|
1032
|
+
|
1033
|
+
def _analyze_error_patterns(self) -> List[Dict[str, Any]]:
|
1034
|
+
"""Analyze common error patterns."""
|
1035
|
+
return []
|
1036
|
+
|
1037
|
+
def _calculate_performance_impact(self) -> Dict[str, Any]:
|
1038
|
+
"""Calculate performance impact of errors."""
|
1039
|
+
return {
|
1040
|
+
"average_resolution_time": "5-15 minutes",
|
1041
|
+
"operations_affected": 0,
|
1042
|
+
"performance_degradation": "Minimal",
|
1043
|
+
}
|
1044
|
+
|
1045
|
+
def _generate_recommendations(self) -> List[str]:
|
1046
|
+
"""Generate recommendations based on error patterns."""
|
1047
|
+
recommendations = []
|
1048
|
+
|
1049
|
+
if self.error_counts[ErrorCategory.AWS_CREDENTIALS] > 0:
|
1050
|
+
recommendations.append("Consider implementing AWS SSO for improved credential management")
|
1051
|
+
|
1052
|
+
if self.error_counts[ErrorCategory.AWS_THROTTLING] > 0:
|
1053
|
+
recommendations.append("Implement exponential backoff for API calls")
|
1054
|
+
|
1055
|
+
if self.error_counts[ErrorCategory.NETWORK] > 0:
|
1056
|
+
recommendations.append("Review network connectivity and proxy settings")
|
1057
|
+
|
1058
|
+
return recommendations
|
1059
|
+
|
1060
|
+
def _summarize_audit_trail(self, hours: int) -> Dict[str, Any]:
|
1061
|
+
"""Summarize audit trail for given time period."""
|
1062
|
+
return {"entries": 0, "resolution_success_rate": 0.0, "average_resolution_time": "5-15 minutes"}
|
1063
|
+
|
1064
|
+
def _execute_automated_recovery(self, enhanced_error: EnhancedError) -> bool:
|
1065
|
+
"""Execute automated recovery procedures."""
|
1066
|
+
# Implementation would depend on specific recovery procedures
|
1067
|
+
print_info("🔄 Executing automated recovery...")
|
1068
|
+
return False
|
1069
|
+
|
1070
|
+
|
1071
|
+
# Factory function for easy integration
|
1072
|
+
def create_exception_handler(
|
1073
|
+
module_name: str, enable_rich_output: bool = True, enable_auto_retry: bool = True
|
1074
|
+
) -> EnterpriseExceptionHandler:
|
1075
|
+
"""Factory function to create enterprise exception handler."""
|
1076
|
+
return EnterpriseExceptionHandler(
|
1077
|
+
module_name=module_name, enable_rich_output=enable_rich_output, enable_auto_retry=enable_auto_retry
|
1078
|
+
)
|
1079
|
+
|
1080
|
+
|
1081
|
+
# Context manager for enhanced exception handling
|
1082
|
+
class enhanced_error_handling:
|
1083
|
+
"""Context manager for enhanced exception handling."""
|
1084
|
+
|
1085
|
+
def __init__(
|
1086
|
+
self,
|
1087
|
+
handler: EnterpriseExceptionHandler,
|
1088
|
+
context: ErrorContext,
|
1089
|
+
operation_data: Optional[Dict[str, Any]] = None,
|
1090
|
+
):
|
1091
|
+
self.handler = handler
|
1092
|
+
self.context = context
|
1093
|
+
self.operation_data = operation_data
|
1094
|
+
|
1095
|
+
def __enter__(self):
|
1096
|
+
return self
|
1097
|
+
|
1098
|
+
def __exit__(self, exc_type, exc_value, traceback_obj):
|
1099
|
+
if exc_value is not None:
|
1100
|
+
enhanced_error = self.handler.handle_exception(exc_value, self.context, self.operation_data)
|
1101
|
+
|
1102
|
+
# Create recovery workflow if error is recoverable
|
1103
|
+
if enhanced_error.retry_possible:
|
1104
|
+
recovery_success = self.handler.create_error_recovery_workflow(enhanced_error, interactive=False)
|
1105
|
+
if recovery_success:
|
1106
|
+
return True # Suppress the exception
|
1107
|
+
|
1108
|
+
return False # Let the exception propagate
|