runbooks 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. runbooks/cloudops/models.py +20 -14
  2. runbooks/common/aws_pricing_api.py +276 -44
  3. runbooks/common/dry_run_examples.py +587 -0
  4. runbooks/common/dry_run_framework.py +520 -0
  5. runbooks/common/memory_optimization.py +533 -0
  6. runbooks/common/performance_optimization_engine.py +1153 -0
  7. runbooks/common/profile_utils.py +10 -3
  8. runbooks/common/sre_performance_suite.py +574 -0
  9. runbooks/finops/business_case_config.py +314 -0
  10. runbooks/finops/cost_processor.py +19 -4
  11. runbooks/finops/ebs_cost_optimizer.py +1 -1
  12. runbooks/finops/embedded_mcp_validator.py +642 -36
  13. runbooks/finops/executive_export.py +789 -0
  14. runbooks/finops/finops_scenarios.py +34 -27
  15. runbooks/finops/notebook_utils.py +1 -1
  16. runbooks/finops/schemas.py +73 -58
  17. runbooks/finops/single_dashboard.py +20 -4
  18. runbooks/finops/vpc_cleanup_exporter.py +2 -1
  19. runbooks/inventory/models/account.py +5 -3
  20. runbooks/inventory/models/inventory.py +1 -1
  21. runbooks/inventory/models/resource.py +5 -3
  22. runbooks/inventory/organizations_discovery.py +89 -5
  23. runbooks/main.py +182 -61
  24. runbooks/operate/vpc_operations.py +60 -31
  25. runbooks/remediation/workspaces_list.py +2 -2
  26. runbooks/vpc/config.py +17 -8
  27. runbooks/vpc/heatmap_engine.py +425 -53
  28. runbooks/vpc/performance_optimized_analyzer.py +546 -0
  29. {runbooks-1.0.1.dist-info → runbooks-1.0.2.dist-info}/METADATA +1 -1
  30. {runbooks-1.0.1.dist-info → runbooks-1.0.2.dist-info}/RECORD +34 -26
  31. {runbooks-1.0.1.dist-info → runbooks-1.0.2.dist-info}/WHEEL +0 -0
  32. {runbooks-1.0.1.dist-info → runbooks-1.0.2.dist-info}/entry_points.txt +0 -0
  33. {runbooks-1.0.1.dist-info → runbooks-1.0.2.dist-info}/licenses/LICENSE +0 -0
  34. {runbooks-1.0.1.dist-info → runbooks-1.0.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,520 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Universal --dry-run Safety Framework for CloudOps Runbooks
4
+
5
+ This module provides a comprehensive, enterprise-grade dry-run framework that ensures
6
+ safety-first operations across all runbooks modules. It implements consistent behavior,
7
+ logging, and safety controls for all operation types.
8
+
9
+ Strategic Alignment:
10
+ - "Move Fast, But Not So Fast We Crash" - Safety-first with explicit confirmation
11
+ - Enterprise safety controls with comprehensive audit trails
12
+ - Consistent UX across all 7 core modules
13
+
14
+ Author: CloudOps Runbooks Team
15
+ Version: 1.0.0 - Enterprise Safety Framework
16
+ """
17
+
18
+ import functools
19
+ import logging
20
+ from datetime import datetime
21
+ from enum import Enum
22
+ from typing import Any, Callable, Dict, List, Optional, Union
23
+ from dataclasses import dataclass
24
+
25
+ from rich.console import Console
26
+ from rich.panel import Panel
27
+ from rich.table import Table
28
+ from rich.text import Text
29
+
30
+ from runbooks.common.rich_utils import (
31
+ console,
32
+ print_header,
33
+ print_success,
34
+ print_warning,
35
+ print_error,
36
+ STATUS_INDICATORS
37
+ )
38
+
39
+
40
+ class OperationType(Enum):
41
+ """Classification of operation types for appropriate dry-run behavior."""
42
+
43
+ # READ-ONLY Operations (inherently safe)
44
+ DISCOVERY = "discovery" # inventory collect, scan
45
+ ANALYSIS = "analysis" # finops dashboard, security assess, vpc analyze
46
+ ASSESSMENT = "assessment" # cfat assess
47
+ REPORTING = "reporting" # generate reports, export data
48
+
49
+ # STATE-CHANGING Operations (require safety controls)
50
+ RESOURCE_CREATE = "create" # EC2 instances, S3 buckets, VPCs
51
+ RESOURCE_MODIFY = "modify" # Update configurations, scaling
52
+ RESOURCE_DELETE = "delete" # Terminate, delete resources
53
+ CONFIGURATION = "config" # Change settings, policies
54
+ REMEDIATION = "remediation" # Security fixes, compliance actions
55
+
56
+ # HIGH-RISK Operations (explicit confirmation required)
57
+ BULK_OPERATIONS = "bulk" # Multi-resource operations
58
+ CROSS_ACCOUNT = "cross_account" # Operations affecting multiple accounts
59
+ FINANCIAL = "financial" # Budget modifications, billing changes
60
+
61
+
62
+ @dataclass
63
+ class DryRunContext:
64
+ """Context information for dry-run operations."""
65
+
66
+ enabled: bool
67
+ operation_type: OperationType
68
+ module_name: str
69
+ operation_name: str
70
+ target_resources: List[str]
71
+ estimated_impact: Optional[str] = None
72
+ safety_level: str = "standard" # standard, high, critical
73
+ requires_confirmation: bool = False
74
+ audit_trail: List[Dict[str, Any]] = None
75
+
76
+ def __post_init__(self):
77
+ if self.audit_trail is None:
78
+ self.audit_trail = []
79
+
80
+
81
+ class DryRunSafetyFramework:
82
+ """
83
+ Universal dry-run safety framework for enterprise operations.
84
+
85
+ Provides consistent dry-run behavior, safety controls, and audit trails
86
+ across all runbooks modules.
87
+ """
88
+
89
+ def __init__(self, console: Optional[Console] = None):
90
+ self.console = console or Console()
91
+ self.logger = logging.getLogger(__name__)
92
+
93
+ # Safety configuration
94
+ self.safety_configs = {
95
+ OperationType.DISCOVERY: {
96
+ "default_dry_run": False, # Discovery is inherently safe
97
+ "requires_confirmation": False,
98
+ "simulation_mode": True, # Can simulate API calls
99
+ "warning_message": None
100
+ },
101
+ OperationType.ANALYSIS: {
102
+ "default_dry_run": False, # Analysis is read-only
103
+ "requires_confirmation": False,
104
+ "simulation_mode": False, # Real API calls for analysis
105
+ "warning_message": None
106
+ },
107
+ OperationType.ASSESSMENT: {
108
+ "default_dry_run": False, # Assessment is read-only
109
+ "requires_confirmation": False,
110
+ "simulation_mode": False,
111
+ "warning_message": None
112
+ },
113
+ OperationType.RESOURCE_CREATE: {
114
+ "default_dry_run": True, # Safety-first for resource creation
115
+ "requires_confirmation": True,
116
+ "simulation_mode": True,
117
+ "warning_message": "⚠️ RESOURCE CREATION: This will create new AWS resources and incur costs"
118
+ },
119
+ OperationType.RESOURCE_MODIFY: {
120
+ "default_dry_run": True, # Safety-first for modifications
121
+ "requires_confirmation": True,
122
+ "simulation_mode": True,
123
+ "warning_message": "⚠️ RESOURCE MODIFICATION: This will modify existing AWS resources"
124
+ },
125
+ OperationType.RESOURCE_DELETE: {
126
+ "default_dry_run": True, # Safety-first for deletion
127
+ "requires_confirmation": True,
128
+ "simulation_mode": True,
129
+ "warning_message": "🚨 RESOURCE DELETION: This will permanently delete AWS resources"
130
+ },
131
+ OperationType.REMEDIATION: {
132
+ "default_dry_run": True, # Safety-first for remediation
133
+ "requires_confirmation": True,
134
+ "simulation_mode": True,
135
+ "warning_message": "🔧 SECURITY REMEDIATION: This will apply security fixes to resources"
136
+ },
137
+ OperationType.BULK_OPERATIONS: {
138
+ "default_dry_run": True, # Safety-first for bulk operations
139
+ "requires_confirmation": True,
140
+ "simulation_mode": True,
141
+ "warning_message": "🔥 BULK OPERATION: This will affect multiple resources simultaneously"
142
+ },
143
+ OperationType.CROSS_ACCOUNT: {
144
+ "default_dry_run": True, # Safety-first for cross-account
145
+ "requires_confirmation": True,
146
+ "simulation_mode": True,
147
+ "warning_message": "🌐 CROSS-ACCOUNT OPERATION: This will affect multiple AWS accounts"
148
+ },
149
+ OperationType.FINANCIAL: {
150
+ "default_dry_run": True, # Safety-first for financial operations
151
+ "requires_confirmation": True,
152
+ "simulation_mode": True,
153
+ "warning_message": "💰 FINANCIAL OPERATION: This will modify budgets or billing configurations"
154
+ }
155
+ }
156
+
157
+ def create_context(
158
+ self,
159
+ dry_run: bool,
160
+ operation_type: OperationType,
161
+ module_name: str,
162
+ operation_name: str,
163
+ target_resources: Optional[List[str]] = None,
164
+ estimated_impact: Optional[str] = None
165
+ ) -> DryRunContext:
166
+ """
167
+ Create a dry-run context for an operation.
168
+
169
+ Args:
170
+ dry_run: User-specified dry-run flag
171
+ operation_type: Type of operation being performed
172
+ module_name: Name of the module (finops, security, etc.)
173
+ operation_name: Specific operation name
174
+ target_resources: List of resources that will be affected
175
+ estimated_impact: Human-readable impact description
176
+
177
+ Returns:
178
+ DryRunContext with appropriate safety settings
179
+ """
180
+ config = self.safety_configs.get(operation_type, self.safety_configs[OperationType.RESOURCE_MODIFY])
181
+
182
+ # Determine actual dry-run state
183
+ if dry_run is None:
184
+ actual_dry_run = config["default_dry_run"]
185
+ else:
186
+ actual_dry_run = dry_run
187
+
188
+ # Determine safety level
189
+ safety_level = "standard"
190
+ if operation_type in [OperationType.RESOURCE_DELETE, OperationType.BULK_OPERATIONS]:
191
+ safety_level = "high"
192
+ elif operation_type in [OperationType.CROSS_ACCOUNT, OperationType.FINANCIAL]:
193
+ safety_level = "critical"
194
+
195
+ context = DryRunContext(
196
+ enabled=actual_dry_run,
197
+ operation_type=operation_type,
198
+ module_name=module_name,
199
+ operation_name=operation_name,
200
+ target_resources=target_resources or [],
201
+ estimated_impact=estimated_impact,
202
+ safety_level=safety_level,
203
+ requires_confirmation=config["requires_confirmation"] and not actual_dry_run
204
+ )
205
+
206
+ # Log context creation
207
+ self._add_audit_entry(context, "context_created", {
208
+ "dry_run_enabled": actual_dry_run,
209
+ "safety_level": safety_level,
210
+ "requires_confirmation": context.requires_confirmation
211
+ })
212
+
213
+ return context
214
+
215
+ def display_dry_run_banner(self, context: DryRunContext) -> None:
216
+ """
217
+ Display appropriate dry-run banner based on operation type.
218
+
219
+ Args:
220
+ context: Dry-run context with operation details
221
+ """
222
+ if context.enabled:
223
+ # Dry-run mode banner
224
+ title = f"{STATUS_INDICATORS['info']} DRY-RUN MODE ENABLED"
225
+
226
+ if context.operation_type in [OperationType.DISCOVERY, OperationType.ANALYSIS, OperationType.ASSESSMENT]:
227
+ message = f"[cyan]Simulation mode: No AWS API calls will be made[/cyan]\n"
228
+ message += f"[dim]Operation: {context.module_name} {context.operation_name}[/dim]"
229
+ else:
230
+ message = f"[yellow]Preview mode: No resources will be modified[/yellow]\n"
231
+ message += f"[dim]Operation: {context.module_name} {context.operation_name}[/dim]\n"
232
+ if context.target_resources:
233
+ message += f"[dim]Target resources: {len(context.target_resources)} items[/dim]"
234
+
235
+ panel = Panel(
236
+ message,
237
+ title=title,
238
+ border_style="cyan",
239
+ title_align="left"
240
+ )
241
+
242
+ else:
243
+ # Live mode banner with warnings
244
+ config = self.safety_configs.get(context.operation_type)
245
+ if config and config.get("warning_message"):
246
+ title = f"{STATUS_INDICATORS['warning']} LIVE MODE - CHANGES WILL BE APPLIED"
247
+
248
+ message = f"[red]{config['warning_message']}[/red]\n"
249
+ message += f"[dim]Operation: {context.module_name} {context.operation_name}[/dim]"
250
+ if context.estimated_impact:
251
+ message += f"\n[yellow]Estimated impact: {context.estimated_impact}[/yellow]"
252
+
253
+ panel = Panel(
254
+ message,
255
+ title=title,
256
+ border_style="red",
257
+ title_align="left"
258
+ )
259
+ else:
260
+ # Standard live mode for read-only operations
261
+ title = f"{STATUS_INDICATORS['success']} LIVE MODE - REAL DATA ANALYSIS"
262
+ message = f"[green]Real AWS API calls will be made for analysis[/green]\n"
263
+ message += f"[dim]Operation: {context.module_name} {context.operation_name}[/dim]"
264
+
265
+ panel = Panel(
266
+ message,
267
+ title=title,
268
+ border_style="green",
269
+ title_align="left"
270
+ )
271
+
272
+ self.console.print(panel)
273
+ self.console.print() # Add spacing
274
+
275
+ def confirm_operation(self, context: DryRunContext) -> bool:
276
+ """
277
+ Request confirmation for operations that require it.
278
+
279
+ Args:
280
+ context: Dry-run context
281
+
282
+ Returns:
283
+ True if user confirms, False otherwise
284
+ """
285
+ if not context.requires_confirmation:
286
+ return True
287
+
288
+ # Show operation details
289
+ table = Table(title="Operation Confirmation Required")
290
+ table.add_column("Property", style="cyan")
291
+ table.add_column("Value", style="white")
292
+
293
+ table.add_row("Module", context.module_name)
294
+ table.add_row("Operation", context.operation_name)
295
+ table.add_row("Safety Level", context.safety_level.upper())
296
+
297
+ if context.target_resources:
298
+ table.add_row("Resources Affected", str(len(context.target_resources)))
299
+
300
+ if context.estimated_impact:
301
+ table.add_row("Estimated Impact", context.estimated_impact)
302
+
303
+ self.console.print(table)
304
+ self.console.print()
305
+
306
+ # Request confirmation
307
+ try:
308
+ import click
309
+ confirmed = click.confirm(
310
+ f"Are you sure you want to proceed with this {context.operation_type.value} operation?",
311
+ default=False
312
+ )
313
+ except ImportError:
314
+ # Fallback for environments without click
315
+ response = input(f"Are you sure you want to proceed with this {context.operation_type.value} operation? [y/N]: ")
316
+ confirmed = response.lower().startswith('y')
317
+
318
+ # Log confirmation
319
+ self._add_audit_entry(context, "confirmation_requested", {
320
+ "user_confirmed": confirmed,
321
+ "safety_level": context.safety_level
322
+ })
323
+
324
+ if not confirmed:
325
+ print_warning("Operation cancelled by user")
326
+
327
+ return confirmed
328
+
329
+ def log_operation_start(self, context: DryRunContext, details: Optional[Dict[str, Any]] = None) -> None:
330
+ """Log the start of an operation with full context."""
331
+ mode = "DRY-RUN" if context.enabled else "LIVE"
332
+
333
+ log_entry = {
334
+ "mode": mode,
335
+ "operation_type": context.operation_type.value,
336
+ "module": context.module_name,
337
+ "operation": context.operation_name,
338
+ "target_count": len(context.target_resources),
339
+ "safety_level": context.safety_level
340
+ }
341
+
342
+ if details:
343
+ log_entry.update(details)
344
+
345
+ self._add_audit_entry(context, "operation_started", log_entry)
346
+
347
+ # Console output
348
+ status = STATUS_INDICATORS.get("running", "🔄")
349
+ self.console.print(f"{status} Starting {mode} operation: {context.operation_name}")
350
+
351
+ def log_operation_complete(
352
+ self,
353
+ context: DryRunContext,
354
+ success: bool = True,
355
+ results: Optional[Dict[str, Any]] = None,
356
+ error: Optional[str] = None
357
+ ) -> None:
358
+ """Log the completion of an operation."""
359
+ mode = "DRY-RUN" if context.enabled else "LIVE"
360
+
361
+ log_entry = {
362
+ "mode": mode,
363
+ "success": success,
364
+ "duration": self._calculate_duration(context),
365
+ }
366
+
367
+ if results:
368
+ log_entry["results"] = results
369
+
370
+ if error:
371
+ log_entry["error"] = error
372
+
373
+ self._add_audit_entry(context, "operation_completed", log_entry)
374
+
375
+ # Console output
376
+ if success:
377
+ status = STATUS_INDICATORS.get("success", "✅")
378
+ print_success(f"Operation completed successfully in {mode} mode")
379
+
380
+ if context.enabled and context.operation_type not in [OperationType.DISCOVERY, OperationType.ANALYSIS, OperationType.ASSESSMENT]:
381
+ self.console.print(f"[dim]💡 To execute changes, run the same command with --no-dry-run[/dim]")
382
+ else:
383
+ status = STATUS_INDICATORS.get("error", "❌")
384
+ print_error(f"Operation failed in {mode} mode: {error}")
385
+
386
+ def _add_audit_entry(self, context: DryRunContext, event: str, data: Dict[str, Any]) -> None:
387
+ """Add an entry to the audit trail."""
388
+ entry = {
389
+ "timestamp": datetime.utcnow().isoformat(),
390
+ "event": event,
391
+ "data": data
392
+ }
393
+ context.audit_trail.append(entry)
394
+
395
+ # Log to system logger
396
+ self.logger.info(f"DryRun {event}", extra={
397
+ "module": context.module_name,
398
+ "operation": context.operation_name,
399
+ "dry_run": context.enabled,
400
+ **data
401
+ })
402
+
403
+ def _calculate_duration(self, context: DryRunContext) -> Optional[str]:
404
+ """Calculate operation duration from audit trail."""
405
+ start_time = None
406
+ end_time = datetime.utcnow()
407
+
408
+ for entry in context.audit_trail:
409
+ if entry["event"] == "operation_started":
410
+ start_time = datetime.fromisoformat(entry["timestamp"])
411
+ break
412
+
413
+ if start_time:
414
+ duration = end_time - start_time
415
+ return f"{duration.total_seconds():.2f}s"
416
+
417
+ return None
418
+
419
+
420
+ def dry_run_operation(
421
+ operation_type: OperationType,
422
+ requires_confirmation: Optional[bool] = None,
423
+ estimated_impact: Optional[str] = None
424
+ ):
425
+ """
426
+ Decorator for operations that support dry-run mode.
427
+
428
+ Args:
429
+ operation_type: Type of operation for appropriate safety controls
430
+ requires_confirmation: Override default confirmation requirement
431
+ estimated_impact: Description of operation impact
432
+
433
+ Usage:
434
+ @dry_run_operation(OperationType.RESOURCE_DELETE, estimated_impact="Delete 5 VPCs")
435
+ def delete_vpcs(dry_run: bool = True, **kwargs):
436
+ # Function receives dry_run_context as first argument
437
+ pass
438
+ """
439
+ def decorator(func: Callable) -> Callable:
440
+ @functools.wraps(func)
441
+ def wrapper(*args, **kwargs):
442
+ # Extract dry_run parameter
443
+ dry_run = kwargs.pop('dry_run', None)
444
+
445
+ # Get module and operation names
446
+ module_name = getattr(func, '__module__', 'unknown').split('.')[-2] if '.' in getattr(func, '__module__', '') else 'unknown'
447
+ operation_name = func.__name__
448
+
449
+ # Create dry-run framework instance
450
+ framework = DryRunSafetyFramework()
451
+
452
+ # Create context
453
+ context = framework.create_context(
454
+ dry_run=dry_run,
455
+ operation_type=operation_type,
456
+ module_name=module_name,
457
+ operation_name=operation_name,
458
+ estimated_impact=estimated_impact
459
+ )
460
+
461
+ # Override confirmation requirement if specified
462
+ if requires_confirmation is not None:
463
+ context.requires_confirmation = requires_confirmation and not context.enabled
464
+
465
+ # Display banner
466
+ framework.display_dry_run_banner(context)
467
+
468
+ # Request confirmation if required
469
+ if not framework.confirm_operation(context):
470
+ return None
471
+
472
+ # Log operation start
473
+ framework.log_operation_start(context)
474
+
475
+ try:
476
+ # Call the original function with context as first argument
477
+ result = func(context, *args, **kwargs)
478
+
479
+ # Log success
480
+ framework.log_operation_complete(context, success=True, results={"completed": True})
481
+
482
+ return result
483
+
484
+ except Exception as e:
485
+ # Log failure
486
+ framework.log_operation_complete(context, success=False, error=str(e))
487
+ raise
488
+
489
+ return wrapper
490
+ return decorator
491
+
492
+
493
+ # Convenience functions for common operation types
494
+ def discovery_operation(func: Callable) -> Callable:
495
+ """Decorator for discovery operations (inventory, scan)."""
496
+ return dry_run_operation(OperationType.DISCOVERY)(func)
497
+
498
+ def analysis_operation(func: Callable) -> Callable:
499
+ """Decorator for analysis operations (finops, security assess, vpc analyze)."""
500
+ return dry_run_operation(OperationType.ANALYSIS)(func)
501
+
502
+ def assessment_operation(func: Callable) -> Callable:
503
+ """Decorator for assessment operations (cfat assess)."""
504
+ return dry_run_operation(OperationType.ASSESSMENT)(func)
505
+
506
+ def resource_creation_operation(estimated_impact: str = None):
507
+ """Decorator for resource creation operations."""
508
+ return dry_run_operation(OperationType.RESOURCE_CREATE, estimated_impact=estimated_impact)
509
+
510
+ def resource_deletion_operation(estimated_impact: str = None):
511
+ """Decorator for resource deletion operations."""
512
+ return dry_run_operation(OperationType.RESOURCE_DELETE, estimated_impact=estimated_impact)
513
+
514
+ def remediation_operation(estimated_impact: str = None):
515
+ """Decorator for security remediation operations."""
516
+ return dry_run_operation(OperationType.REMEDIATION, estimated_impact=estimated_impact)
517
+
518
+
519
+ # Global framework instance for direct use
520
+ framework = DryRunSafetyFramework()