runbooks 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. runbooks/cloudops/models.py +20 -14
  2. runbooks/common/aws_pricing_api.py +276 -44
  3. runbooks/common/dry_run_examples.py +587 -0
  4. runbooks/common/dry_run_framework.py +520 -0
  5. runbooks/common/memory_optimization.py +533 -0
  6. runbooks/common/performance_optimization_engine.py +1153 -0
  7. runbooks/common/profile_utils.py +10 -3
  8. runbooks/common/sre_performance_suite.py +574 -0
  9. runbooks/finops/business_case_config.py +314 -0
  10. runbooks/finops/cost_processor.py +19 -4
  11. runbooks/finops/ebs_cost_optimizer.py +1 -1
  12. runbooks/finops/embedded_mcp_validator.py +642 -36
  13. runbooks/finops/executive_export.py +789 -0
  14. runbooks/finops/finops_scenarios.py +34 -27
  15. runbooks/finops/notebook_utils.py +1 -1
  16. runbooks/finops/schemas.py +73 -58
  17. runbooks/finops/single_dashboard.py +20 -4
  18. runbooks/finops/vpc_cleanup_exporter.py +2 -1
  19. runbooks/inventory/models/account.py +5 -3
  20. runbooks/inventory/models/inventory.py +1 -1
  21. runbooks/inventory/models/resource.py +5 -3
  22. runbooks/inventory/organizations_discovery.py +89 -5
  23. runbooks/main.py +182 -61
  24. runbooks/operate/vpc_operations.py +60 -31
  25. runbooks/remediation/workspaces_list.py +2 -2
  26. runbooks/vpc/config.py +17 -8
  27. runbooks/vpc/heatmap_engine.py +425 -53
  28. runbooks/vpc/performance_optimized_analyzer.py +546 -0
  29. {runbooks-1.0.1.dist-info → runbooks-1.0.2.dist-info}/METADATA +1 -1
  30. {runbooks-1.0.1.dist-info → runbooks-1.0.2.dist-info}/RECORD +34 -26
  31. {runbooks-1.0.1.dist-info → runbooks-1.0.2.dist-info}/WHEEL +0 -0
  32. {runbooks-1.0.1.dist-info → runbooks-1.0.2.dist-info}/entry_points.txt +0 -0
  33. {runbooks-1.0.1.dist-info → runbooks-1.0.2.dist-info}/licenses/LICENSE +0 -0
  34. {runbooks-1.0.1.dist-info → runbooks-1.0.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,533 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Memory Optimization Framework for CloudOps-Runbooks
4
+
5
+ 🎯 SRE Automation Specialist Implementation
6
+ Following proven systematic delegation patterns for memory management and optimization.
7
+
8
+ Addresses: Memory Usage Optimization for Large-Scale Operations
9
+ Features:
10
+ - Real-time memory monitoring and alerting
11
+ - Automatic garbage collection optimization
12
+ - Memory-efficient data processing patterns
13
+ - Large dataset streaming and pagination
14
+ - Memory leak detection and prevention
15
+ - Resource cleanup automation
16
+ """
17
+
18
+ import gc
19
+ import logging
20
+ import threading
21
+ import time
22
+ import weakref
23
+ from contextlib import contextmanager
24
+ from dataclasses import dataclass, field
25
+ from datetime import datetime, timezone
26
+ from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Union
27
+ import sys
28
+ import tracemalloc
29
+
30
+ try:
31
+ import psutil
32
+ PSUTIL_AVAILABLE = True
33
+ except ImportError:
34
+ PSUTIL_AVAILABLE = False
35
+
36
+ from runbooks.common.rich_utils import (
37
+ console,
38
+ print_header,
39
+ print_success,
40
+ print_warning,
41
+ print_error,
42
+ create_table,
43
+ STATUS_INDICATORS
44
+ )
45
+
46
+ logger = logging.getLogger(__name__)
47
+
48
+
49
+ @dataclass
50
+ class MemorySnapshot:
51
+ """Memory usage snapshot for monitoring"""
52
+ timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
53
+ memory_mb: float = 0.0
54
+ peak_memory_mb: float = 0.0
55
+ memory_percent: float = 0.0
56
+ gc_collections: Tuple[int, int, int] = (0, 0, 0)
57
+ active_objects: int = 0
58
+ operation_context: Optional[str] = None
59
+
60
+
61
+ @dataclass
62
+ class MemoryOptimizationMetrics:
63
+ """Memory optimization performance metrics"""
64
+ operation_name: str
65
+ start_memory_mb: float = 0.0
66
+ peak_memory_mb: float = 0.0
67
+ end_memory_mb: float = 0.0
68
+ memory_saved_mb: float = 0.0
69
+ gc_collections_triggered: int = 0
70
+ optimization_techniques_applied: List[str] = field(default_factory=list)
71
+ memory_warnings: List[str] = field(default_factory=list)
72
+ success: bool = True
73
+
74
+
75
+ class MemoryOptimizer:
76
+ """
77
+ Enterprise memory optimization system for large-scale CloudOps operations
78
+
79
+ Provides:
80
+ - Real-time memory monitoring with alerting thresholds
81
+ - Automatic garbage collection optimization
82
+ - Memory-efficient data processing patterns
83
+ - Resource cleanup automation
84
+ - Memory leak detection and prevention
85
+ """
86
+
87
+ def __init__(self,
88
+ warning_threshold_mb: float = 1024,
89
+ critical_threshold_mb: float = 2048,
90
+ monitoring_interval_seconds: float = 5.0):
91
+ """
92
+ Initialize memory optimizer
93
+
94
+ Args:
95
+ warning_threshold_mb: Memory warning threshold in MB
96
+ critical_threshold_mb: Memory critical threshold in MB
97
+ monitoring_interval_seconds: Memory monitoring check interval
98
+ """
99
+ self.warning_threshold_mb = warning_threshold_mb
100
+ self.critical_threshold_mb = critical_threshold_mb
101
+ self.monitoring_interval_seconds = monitoring_interval_seconds
102
+
103
+ # Memory tracking
104
+ self.snapshots: List[MemorySnapshot] = []
105
+ self.metrics: List[MemoryOptimizationMetrics] = []
106
+ self.monitoring_active = False
107
+ self.monitoring_thread: Optional[threading.Thread] = None
108
+
109
+ # Process handle if psutil available
110
+ self.process = psutil.Process() if PSUTIL_AVAILABLE else None
111
+
112
+ # Memory management strategies
113
+ self._setup_gc_optimization()
114
+
115
+ # Weak reference tracking for leak detection
116
+ self._tracked_objects: weakref.WeakSet = weakref.WeakSet()
117
+
118
+ def _setup_gc_optimization(self):
119
+ """Configure garbage collection optimization"""
120
+ # Optimize garbage collection thresholds for large-scale operations
121
+ gc.set_threshold(700, 10, 10) # More aggressive collection
122
+
123
+ # Enable automatic garbage collection
124
+ if not gc.isenabled():
125
+ gc.enable()
126
+
127
+ logger.debug("Memory optimization: Garbage collection configured")
128
+
129
+ @contextmanager
130
+ def optimize_memory_usage(self, operation_name: str, enable_monitoring: bool = True):
131
+ """
132
+ Context manager for memory-optimized operation execution
133
+
134
+ Args:
135
+ operation_name: Name of operation being optimized
136
+ enable_monitoring: Whether to enable real-time monitoring
137
+ """
138
+ # Initialize metrics
139
+ metrics = MemoryOptimizationMetrics(operation_name=operation_name)
140
+
141
+ # Start monitoring if requested
142
+ if enable_monitoring:
143
+ self.start_memory_monitoring()
144
+
145
+ # Record initial memory state
146
+ initial_snapshot = self._take_memory_snapshot(operation_name)
147
+ metrics.start_memory_mb = initial_snapshot.memory_mb
148
+
149
+ try:
150
+ console.log(f"[dim]🧠 Starting memory-optimized: {operation_name} (current: {initial_snapshot.memory_mb:.1f}MB)[/]")
151
+
152
+ yield metrics
153
+
154
+ # Record final state and calculate savings
155
+ final_snapshot = self._take_memory_snapshot(f"{operation_name}_end")
156
+ metrics.end_memory_mb = final_snapshot.memory_mb
157
+ metrics.peak_memory_mb = max(s.peak_memory_mb for s in self.snapshots[-10:]) if self.snapshots else final_snapshot.memory_mb
158
+
159
+ # Calculate memory efficiency
160
+ if metrics.start_memory_mb > 0:
161
+ memory_change = metrics.end_memory_mb - metrics.start_memory_mb
162
+ if memory_change < 0:
163
+ metrics.memory_saved_mb = abs(memory_change)
164
+ metrics.optimization_techniques_applied.append("memory_cleanup")
165
+
166
+ metrics.success = True
167
+ self._log_memory_results(metrics)
168
+
169
+ except Exception as e:
170
+ # Handle memory issues during operation
171
+ error_snapshot = self._take_memory_snapshot(f"{operation_name}_error")
172
+ metrics.end_memory_mb = error_snapshot.memory_mb
173
+ metrics.success = False
174
+
175
+ # Force garbage collection on error
176
+ collected = gc.collect()
177
+ metrics.gc_collections_triggered += collected
178
+ metrics.optimization_techniques_applied.append("error_gc_cleanup")
179
+
180
+ print_error(f"Memory optimization failed for {operation_name}", e)
181
+ raise
182
+
183
+ finally:
184
+ # Stop monitoring and store metrics
185
+ if enable_monitoring:
186
+ self.stop_memory_monitoring()
187
+
188
+ self.metrics.append(metrics)
189
+
190
+ # Cleanup operation-specific resources
191
+ self._cleanup_operation_resources()
192
+
193
+ def start_memory_monitoring(self):
194
+ """Start background memory monitoring"""
195
+ if self.monitoring_active:
196
+ return
197
+
198
+ self.monitoring_active = True
199
+
200
+ def monitor_memory():
201
+ while self.monitoring_active:
202
+ try:
203
+ snapshot = self._take_memory_snapshot("monitoring")
204
+ self.snapshots.append(snapshot)
205
+
206
+ # Check thresholds and alert
207
+ self._check_memory_thresholds(snapshot)
208
+
209
+ # Limit snapshot history to prevent memory growth
210
+ if len(self.snapshots) > 1000:
211
+ self.snapshots = self.snapshots[-500:] # Keep recent 500
212
+
213
+ time.sleep(self.monitoring_interval_seconds)
214
+
215
+ except Exception as e:
216
+ logger.debug(f"Memory monitoring error: {e}")
217
+ break
218
+
219
+ self.monitoring_thread = threading.Thread(target=monitor_memory, daemon=True)
220
+ self.monitoring_thread.start()
221
+
222
+ logger.debug("Memory monitoring started")
223
+
224
+ def stop_memory_monitoring(self):
225
+ """Stop background memory monitoring"""
226
+ self.monitoring_active = False
227
+
228
+ if self.monitoring_thread and self.monitoring_thread.is_alive():
229
+ self.monitoring_thread.join(timeout=5.0)
230
+
231
+ logger.debug("Memory monitoring stopped")
232
+
233
+ def _take_memory_snapshot(self, operation_context: str) -> MemorySnapshot:
234
+ """Take comprehensive memory usage snapshot"""
235
+ snapshot = MemorySnapshot(operation_context=operation_context)
236
+
237
+ if self.process and PSUTIL_AVAILABLE:
238
+ try:
239
+ memory_info = self.process.memory_info()
240
+ snapshot.memory_mb = memory_info.rss / (1024 * 1024)
241
+ snapshot.peak_memory_mb = getattr(memory_info, 'peak_wset', memory_info.rss) / (1024 * 1024)
242
+
243
+ # Memory percentage if available
244
+ try:
245
+ snapshot.memory_percent = self.process.memory_percent()
246
+ except:
247
+ snapshot.memory_percent = 0.0
248
+
249
+ except Exception as e:
250
+ logger.debug(f"Failed to get process memory info: {e}")
251
+ else:
252
+ # Fallback using tracemalloc if available
253
+ try:
254
+ import tracemalloc
255
+ if tracemalloc.is_tracing():
256
+ current, peak = tracemalloc.get_traced_memory()
257
+ snapshot.memory_mb = current / (1024 * 1024)
258
+ snapshot.peak_memory_mb = peak / (1024 * 1024)
259
+ except:
260
+ pass
261
+
262
+ # GC statistics
263
+ snapshot.gc_collections = tuple(gc.get_count())
264
+
265
+ # Active objects count
266
+ try:
267
+ snapshot.active_objects = len(gc.get_objects())
268
+ except:
269
+ snapshot.active_objects = 0
270
+
271
+ return snapshot
272
+
273
+ def _check_memory_thresholds(self, snapshot: MemorySnapshot):
274
+ """Check memory usage against thresholds and alert if needed"""
275
+ if snapshot.memory_mb > self.critical_threshold_mb:
276
+ console.log(f"[red]🚨 CRITICAL: Memory usage {snapshot.memory_mb:.1f}MB exceeds critical threshold {self.critical_threshold_mb}MB[/red]")
277
+
278
+ # Force aggressive garbage collection
279
+ collected = self._force_garbage_collection()
280
+ console.log(f"[yellow]🗑️ Emergency GC collected {collected} objects[/yellow]")
281
+
282
+ elif snapshot.memory_mb > self.warning_threshold_mb:
283
+ console.log(f"[yellow]⚠️ WARNING: Memory usage {snapshot.memory_mb:.1f}MB exceeds warning threshold {self.warning_threshold_mb}MB[/yellow]")
284
+
285
+ def _force_garbage_collection(self) -> int:
286
+ """Force comprehensive garbage collection"""
287
+ total_collected = 0
288
+
289
+ # Multiple GC passes for thorough cleanup
290
+ for generation in range(3):
291
+ collected = gc.collect(generation)
292
+ total_collected += collected
293
+
294
+ # Additional cleanup
295
+ gc.collect() # Final full collection
296
+
297
+ return total_collected
298
+
299
+ def _cleanup_operation_resources(self):
300
+ """Clean up operation-specific resources"""
301
+ # Force garbage collection
302
+ gc.collect()
303
+
304
+ # Clear internal caches if they've grown large
305
+ if len(self.snapshots) > 100:
306
+ self.snapshots = self.snapshots[-50:] # Keep recent snapshots
307
+
308
+ def _log_memory_results(self, metrics: MemoryOptimizationMetrics):
309
+ """Log memory optimization results"""
310
+ if metrics.memory_saved_mb > 0:
311
+ print_success(f"Memory optimized for {metrics.operation_name}: "
312
+ f"saved {metrics.memory_saved_mb:.1f}MB "
313
+ f"({metrics.start_memory_mb:.1f}MB → {metrics.end_memory_mb:.1f}MB)")
314
+ elif metrics.end_memory_mb <= metrics.start_memory_mb * 1.1: # Within 10%
315
+ console.log(f"[green]Memory stable for {metrics.operation_name}: "
316
+ f"{metrics.start_memory_mb:.1f}MB → {metrics.end_memory_mb:.1f}MB[/green]")
317
+ else:
318
+ console.log(f"[yellow]Memory increased for {metrics.operation_name}: "
319
+ f"{metrics.start_memory_mb:.1f}MB → {metrics.end_memory_mb:.1f}MB[/yellow]")
320
+
321
+ def create_memory_efficient_iterator(self, data: List[Any], batch_size: int = 100) -> Iterator[List[Any]]:
322
+ """
323
+ Create memory-efficient iterator for large datasets
324
+
325
+ Args:
326
+ data: Large dataset to process
327
+ batch_size: Size of each batch to yield
328
+
329
+ Yields:
330
+ Batched data chunks for memory-efficient processing
331
+ """
332
+ for i in range(0, len(data), batch_size):
333
+ batch = data[i:i + batch_size]
334
+ yield batch
335
+
336
+ # Trigger GC every 10 batches to prevent memory buildup
337
+ if i > 0 and (i // batch_size) % 10 == 0:
338
+ gc.collect()
339
+
340
+ def optimize_large_dict_processing(self, large_dict: Dict[str, Any],
341
+ chunk_size: int = 1000) -> Iterator[Dict[str, Any]]:
342
+ """
343
+ Memory-efficient large dictionary processing
344
+
345
+ Args:
346
+ large_dict: Large dictionary to process
347
+ chunk_size: Number of items to process per chunk
348
+
349
+ Yields:
350
+ Dictionary chunks for processing
351
+ """
352
+ items = list(large_dict.items())
353
+
354
+ for i in range(0, len(items), chunk_size):
355
+ chunk_items = items[i:i + chunk_size]
356
+ chunk_dict = dict(chunk_items)
357
+
358
+ yield chunk_dict
359
+
360
+ # Clean up temporary variables
361
+ del chunk_items, chunk_dict
362
+
363
+ # Periodic GC
364
+ if i > 0 and (i // chunk_size) % 5 == 0:
365
+ gc.collect()
366
+
367
+ def track_object_for_leaks(self, obj: Any, name: str = ""):
368
+ """Track object for memory leak detection"""
369
+ self._tracked_objects.add(obj)
370
+ logger.debug(f"Tracking object for leaks: {name or type(obj).__name__}")
371
+
372
+ def get_memory_usage_report(self) -> Dict[str, Any]:
373
+ """Get comprehensive memory usage report"""
374
+ current_snapshot = self._take_memory_snapshot("report_generation")
375
+
376
+ # Calculate statistics from recent snapshots
377
+ recent_snapshots = self.snapshots[-20:] if self.snapshots else [current_snapshot]
378
+
379
+ avg_memory = sum(s.memory_mb for s in recent_snapshots) / len(recent_snapshots)
380
+ peak_memory = max(s.peak_memory_mb for s in recent_snapshots)
381
+
382
+ return {
383
+ "current_memory_mb": current_snapshot.memory_mb,
384
+ "average_memory_mb": avg_memory,
385
+ "peak_memory_mb": peak_memory,
386
+ "memory_percent": current_snapshot.memory_percent,
387
+ "warning_threshold_mb": self.warning_threshold_mb,
388
+ "critical_threshold_mb": self.critical_threshold_mb,
389
+ "active_objects": current_snapshot.active_objects,
390
+ "gc_collections": current_snapshot.gc_collections,
391
+ "tracked_objects": len(self._tracked_objects),
392
+ "memory_status": self._get_memory_status(current_snapshot),
393
+ "optimization_recommendations": self._get_optimization_recommendations(current_snapshot)
394
+ }
395
+
396
+ def _get_memory_status(self, snapshot: MemorySnapshot) -> str:
397
+ """Determine current memory status"""
398
+ if snapshot.memory_mb > self.critical_threshold_mb:
399
+ return "critical"
400
+ elif snapshot.memory_mb > self.warning_threshold_mb:
401
+ return "warning"
402
+ elif snapshot.memory_mb > self.warning_threshold_mb * 0.8:
403
+ return "moderate"
404
+ else:
405
+ return "good"
406
+
407
+ def _get_optimization_recommendations(self, snapshot: MemorySnapshot) -> List[str]:
408
+ """Generate memory optimization recommendations"""
409
+ recommendations = []
410
+
411
+ if snapshot.memory_mb > self.critical_threshold_mb:
412
+ recommendations.append("Immediate garbage collection required")
413
+ recommendations.append("Consider batch processing for large operations")
414
+
415
+ if snapshot.memory_mb > self.warning_threshold_mb:
416
+ recommendations.append("Monitor memory usage closely")
417
+ recommendations.append("Implement streaming processing for large datasets")
418
+
419
+ if snapshot.active_objects > 100000:
420
+ recommendations.append("High object count detected - review object lifecycle")
421
+
422
+ if len(self._tracked_objects) > 0:
423
+ recommendations.append(f"{len(self._tracked_objects)} objects being tracked for leaks")
424
+
425
+ return recommendations
426
+
427
+ def create_memory_summary_table(self) -> None:
428
+ """Display memory optimization summary in Rich table format"""
429
+ if not self.metrics:
430
+ console.print("[yellow]No memory optimization metrics available[/yellow]")
431
+ return
432
+
433
+ print_header("Memory Optimization Summary", "SRE Memory Management")
434
+
435
+ # Create metrics table
436
+ table = create_table(
437
+ title="Memory Optimization Results",
438
+ columns=[
439
+ {"name": "Operation", "style": "cyan", "justify": "left"},
440
+ {"name": "Start (MB)", "style": "white", "justify": "right"},
441
+ {"name": "Peak (MB)", "style": "white", "justify": "right"},
442
+ {"name": "End (MB)", "style": "white", "justify": "right"},
443
+ {"name": "Saved (MB)", "style": "green", "justify": "right"},
444
+ {"name": "Optimizations", "style": "dim", "justify": "left", "max_width": 25},
445
+ {"name": "Status", "style": "white", "justify": "center"}
446
+ ]
447
+ )
448
+
449
+ for metrics in self.metrics:
450
+ status_icon = STATUS_INDICATORS['success'] if metrics.success else STATUS_INDICATORS['error']
451
+ status_color = 'green' if metrics.success else 'red'
452
+
453
+ saved_text = f"+{metrics.memory_saved_mb:.1f}" if metrics.memory_saved_mb > 0 else "0.0"
454
+
455
+ table.add_row(
456
+ metrics.operation_name,
457
+ f"{metrics.start_memory_mb:.1f}",
458
+ f"{metrics.peak_memory_mb:.1f}",
459
+ f"{metrics.end_memory_mb:.1f}",
460
+ saved_text,
461
+ ", ".join(metrics.optimization_techniques_applied[:2]) + ("..." if len(metrics.optimization_techniques_applied) > 2 else ""),
462
+ f"[{status_color}]{status_icon}[/]"
463
+ )
464
+
465
+ console.print(table)
466
+
467
+ # Current memory status
468
+ report = self.get_memory_usage_report()
469
+ status_color = {
470
+ 'good': 'green',
471
+ 'moderate': 'yellow',
472
+ 'warning': 'yellow',
473
+ 'critical': 'red'
474
+ }.get(report['memory_status'], 'white')
475
+
476
+ console.print(f"\n[{status_color}]Current Memory: {report['current_memory_mb']:.1f}MB ({report['memory_status'].upper()})[/{status_color}]")
477
+
478
+ def clear_optimization_data(self):
479
+ """Clear all optimization tracking data"""
480
+ self.snapshots.clear()
481
+ self.metrics.clear()
482
+ self._tracked_objects.clear()
483
+ gc.collect()
484
+ print_success("Memory optimization data cleared")
485
+
486
+
487
+ # Global memory optimizer instance
488
+ _memory_optimizer: Optional[MemoryOptimizer] = None
489
+
490
+
491
+ def get_memory_optimizer(warning_threshold_mb: float = 1024,
492
+ critical_threshold_mb: float = 2048) -> MemoryOptimizer:
493
+ """Get or create global memory optimizer instance"""
494
+ global _memory_optimizer
495
+ if _memory_optimizer is None:
496
+ _memory_optimizer = MemoryOptimizer(
497
+ warning_threshold_mb=warning_threshold_mb,
498
+ critical_threshold_mb=critical_threshold_mb
499
+ )
500
+ return _memory_optimizer
501
+
502
+
503
+ def create_memory_report():
504
+ """Create comprehensive memory optimization report"""
505
+ if _memory_optimizer:
506
+ _memory_optimizer.create_memory_summary_table()
507
+ else:
508
+ console.print("[yellow]No memory optimizer initialized[/yellow]")
509
+
510
+
511
+ # Memory optimization decorators
512
+ def memory_optimized(operation_name: str = None, enable_monitoring: bool = True):
513
+ """Decorator for memory-optimized function execution"""
514
+ def decorator(func: Callable) -> Callable:
515
+ def wrapper(*args, **kwargs):
516
+ optimizer = get_memory_optimizer()
517
+ op_name = operation_name or f"{func.__module__}.{func.__name__}"
518
+
519
+ with optimizer.optimize_memory_usage(op_name, enable_monitoring):
520
+ return func(*args, **kwargs)
521
+ return wrapper
522
+ return decorator
523
+
524
+
525
+ # Export public interface
526
+ __all__ = [
527
+ "MemoryOptimizer",
528
+ "MemorySnapshot",
529
+ "MemoryOptimizationMetrics",
530
+ "get_memory_optimizer",
531
+ "create_memory_report",
532
+ "memory_optimized"
533
+ ]