runbooks 0.7.9__py3-none-any.whl → 0.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. runbooks/__init__.py +1 -1
  2. runbooks/cfat/README.md +12 -1
  3. runbooks/cfat/__init__.py +1 -1
  4. runbooks/cfat/assessment/compliance.py +4 -1
  5. runbooks/cfat/assessment/runner.py +42 -34
  6. runbooks/cfat/models.py +1 -1
  7. runbooks/cloudops/__init__.py +123 -0
  8. runbooks/cloudops/base.py +385 -0
  9. runbooks/cloudops/cost_optimizer.py +811 -0
  10. runbooks/cloudops/infrastructure_optimizer.py +29 -0
  11. runbooks/cloudops/interfaces.py +828 -0
  12. runbooks/cloudops/lifecycle_manager.py +29 -0
  13. runbooks/cloudops/mcp_cost_validation.py +678 -0
  14. runbooks/cloudops/models.py +251 -0
  15. runbooks/cloudops/monitoring_automation.py +29 -0
  16. runbooks/cloudops/notebook_framework.py +676 -0
  17. runbooks/cloudops/security_enforcer.py +449 -0
  18. runbooks/common/__init__.py +152 -0
  19. runbooks/common/accuracy_validator.py +1039 -0
  20. runbooks/common/context_logger.py +440 -0
  21. runbooks/common/cross_module_integration.py +594 -0
  22. runbooks/common/enhanced_exception_handler.py +1108 -0
  23. runbooks/common/enterprise_audit_integration.py +634 -0
  24. runbooks/common/mcp_cost_explorer_integration.py +900 -0
  25. runbooks/common/mcp_integration.py +548 -0
  26. runbooks/common/performance_monitor.py +387 -0
  27. runbooks/common/profile_utils.py +216 -0
  28. runbooks/common/rich_utils.py +172 -1
  29. runbooks/feedback/user_feedback_collector.py +440 -0
  30. runbooks/finops/README.md +377 -458
  31. runbooks/finops/__init__.py +4 -21
  32. runbooks/finops/account_resolver.py +279 -0
  33. runbooks/finops/accuracy_cross_validator.py +638 -0
  34. runbooks/finops/aws_client.py +721 -36
  35. runbooks/finops/budget_integration.py +313 -0
  36. runbooks/finops/cli.py +59 -5
  37. runbooks/finops/cost_optimizer.py +1340 -0
  38. runbooks/finops/cost_processor.py +211 -37
  39. runbooks/finops/dashboard_router.py +900 -0
  40. runbooks/finops/dashboard_runner.py +990 -232
  41. runbooks/finops/embedded_mcp_validator.py +288 -0
  42. runbooks/finops/enhanced_dashboard_runner.py +8 -7
  43. runbooks/finops/enhanced_progress.py +327 -0
  44. runbooks/finops/enhanced_trend_visualization.py +423 -0
  45. runbooks/finops/finops_dashboard.py +184 -1829
  46. runbooks/finops/helpers.py +509 -196
  47. runbooks/finops/iam_guidance.py +400 -0
  48. runbooks/finops/markdown_exporter.py +466 -0
  49. runbooks/finops/multi_dashboard.py +1502 -0
  50. runbooks/finops/optimizer.py +15 -15
  51. runbooks/finops/profile_processor.py +2 -2
  52. runbooks/finops/runbooks.inventory.organizations_discovery.log +0 -0
  53. runbooks/finops/runbooks.security.report_generator.log +0 -0
  54. runbooks/finops/runbooks.security.run_script.log +0 -0
  55. runbooks/finops/runbooks.security.security_export.log +0 -0
  56. runbooks/finops/schemas.py +589 -0
  57. runbooks/finops/service_mapping.py +195 -0
  58. runbooks/finops/single_dashboard.py +710 -0
  59. runbooks/finops/tests/test_reference_images_validation.py +1 -1
  60. runbooks/inventory/README.md +12 -1
  61. runbooks/inventory/core/collector.py +157 -29
  62. runbooks/inventory/list_ec2_instances.py +9 -6
  63. runbooks/inventory/list_ssm_parameters.py +10 -10
  64. runbooks/inventory/organizations_discovery.py +210 -164
  65. runbooks/inventory/rich_inventory_display.py +74 -107
  66. runbooks/inventory/run_on_multi_accounts.py +13 -13
  67. runbooks/inventory/runbooks.inventory.organizations_discovery.log +0 -0
  68. runbooks/inventory/runbooks.security.security_export.log +0 -0
  69. runbooks/main.py +1371 -240
  70. runbooks/metrics/dora_metrics_engine.py +711 -17
  71. runbooks/monitoring/performance_monitor.py +433 -0
  72. runbooks/operate/README.md +394 -0
  73. runbooks/operate/base.py +215 -47
  74. runbooks/operate/ec2_operations.py +435 -5
  75. runbooks/operate/iam_operations.py +598 -3
  76. runbooks/operate/privatelink_operations.py +1 -1
  77. runbooks/operate/rds_operations.py +508 -0
  78. runbooks/operate/s3_operations.py +508 -0
  79. runbooks/operate/vpc_endpoints.py +1 -1
  80. runbooks/remediation/README.md +489 -13
  81. runbooks/remediation/base.py +5 -3
  82. runbooks/remediation/commons.py +8 -4
  83. runbooks/security/ENTERPRISE_SECURITY_FRAMEWORK.md +506 -0
  84. runbooks/security/README.md +12 -1
  85. runbooks/security/__init__.py +265 -33
  86. runbooks/security/cloudops_automation_security_validator.py +1164 -0
  87. runbooks/security/compliance_automation.py +12 -10
  88. runbooks/security/compliance_automation_engine.py +1021 -0
  89. runbooks/security/enterprise_security_framework.py +930 -0
  90. runbooks/security/enterprise_security_policies.json +293 -0
  91. runbooks/security/executive_security_dashboard.py +1247 -0
  92. runbooks/security/integration_test_enterprise_security.py +879 -0
  93. runbooks/security/module_security_integrator.py +641 -0
  94. runbooks/security/multi_account_security_controls.py +2254 -0
  95. runbooks/security/real_time_security_monitor.py +1196 -0
  96. runbooks/security/report_generator.py +1 -1
  97. runbooks/security/run_script.py +4 -8
  98. runbooks/security/security_baseline_tester.py +39 -52
  99. runbooks/security/security_export.py +99 -120
  100. runbooks/sre/README.md +472 -0
  101. runbooks/sre/__init__.py +33 -0
  102. runbooks/sre/mcp_reliability_engine.py +1049 -0
  103. runbooks/sre/performance_optimization_engine.py +1032 -0
  104. runbooks/sre/production_monitoring_framework.py +584 -0
  105. runbooks/sre/reliability_monitoring_framework.py +1011 -0
  106. runbooks/validation/__init__.py +2 -2
  107. runbooks/validation/benchmark.py +154 -149
  108. runbooks/validation/cli.py +159 -147
  109. runbooks/validation/mcp_validator.py +291 -248
  110. runbooks/vpc/README.md +478 -0
  111. runbooks/vpc/__init__.py +2 -2
  112. runbooks/vpc/manager_interface.py +366 -351
  113. runbooks/vpc/networking_wrapper.py +68 -36
  114. runbooks/vpc/rich_formatters.py +22 -8
  115. runbooks-0.9.1.dist-info/METADATA +308 -0
  116. {runbooks-0.7.9.dist-info → runbooks-0.9.1.dist-info}/RECORD +120 -59
  117. {runbooks-0.7.9.dist-info → runbooks-0.9.1.dist-info}/entry_points.txt +1 -1
  118. runbooks/finops/cross_validation.py +0 -375
  119. runbooks-0.7.9.dist-info/METADATA +0 -636
  120. {runbooks-0.7.9.dist-info → runbooks-0.9.1.dist-info}/WHEEL +0 -0
  121. {runbooks-0.7.9.dist-info → runbooks-0.9.1.dist-info}/licenses/LICENSE +0 -0
  122. {runbooks-0.7.9.dist-info → runbooks-0.9.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,433 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Performance Monitoring Framework for CloudOps Runbooks Platform.
4
+
5
+ Monitors performance metrics, tracks SLA compliance, and generates
6
+ alerts for performance degradation across all enhanced modules.
7
+
8
+ Features:
9
+ - Real-time performance tracking
10
+ - SLA compliance monitoring
11
+ - Automated alerting system
12
+ - Dashboard metrics collection
13
+ - User experience analytics
14
+
15
+ Author: Enterprise Product Owner
16
+ Version: 1.0.0 - Phase 2 Production Deployment
17
+ """
18
+
19
+ import json
20
+ import logging
21
+ import time
22
+ from datetime import datetime, timedelta
23
+ from pathlib import Path
24
+ from typing import Any, Dict, List, Optional
25
+
26
+ import psutil
27
+ from rich.console import Console
28
+ from rich.panel import Panel
29
+ from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
30
+ from rich.table import Table
31
+
32
+ console = Console()
33
+
34
+
35
+ class PerformanceMonitor:
36
+ """
37
+ Enterprise performance monitoring for CloudOps Runbooks platform.
38
+
39
+ Tracks performance metrics, SLA compliance, and user experience
40
+ across all enhanced modules (operate, cfat, inventory, security, finops).
41
+ """
42
+
43
+ def __init__(self):
44
+ """Initialize performance monitoring framework."""
45
+ self.metrics_file = Path("artifacts/monitoring/performance_metrics.json")
46
+ self.metrics_file.parent.mkdir(parents=True, exist_ok=True)
47
+
48
+ # Performance targets from Phase 2 requirements
49
+ self.performance_targets = {
50
+ "operate": {
51
+ "target_time": 2.0, # <2s for operations
52
+ "alert_threshold": 3.0,
53
+ "description": "Resource Operations",
54
+ },
55
+ "cfat": {
56
+ "target_time": 30.0, # <30s for assessments
57
+ "alert_threshold": 45.0,
58
+ "description": "Cloud Foundations Assessment",
59
+ },
60
+ "inventory": {
61
+ "target_time": 45.0, # <45s for multi-account
62
+ "alert_threshold": 60.0,
63
+ "description": "Multi-Account Discovery",
64
+ },
65
+ "security": {
66
+ "target_time": 15.0, # <15s for security baseline
67
+ "alert_threshold": 20.0,
68
+ "description": "Security Baseline Assessment",
69
+ },
70
+ "finops": {
71
+ "target_time": 60.0, # <60s for cost analysis
72
+ "alert_threshold": 90.0,
73
+ "description": "FinOps Dashboard",
74
+ },
75
+ }
76
+
77
+ self.logger = self._setup_logging()
78
+
79
+ def _setup_logging(self) -> logging.Logger:
80
+ """Setup performance monitoring logging."""
81
+ logger = logging.getLogger("performance_monitor")
82
+ logger.setLevel(logging.INFO)
83
+
84
+ handler = logging.FileHandler("artifacts/monitoring/performance.log")
85
+ formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
86
+ handler.setFormatter(formatter)
87
+ logger.addHandler(handler)
88
+
89
+ return logger
90
+
91
+ def track_operation(
92
+ self, module: str, operation: str, execution_time: float, success: bool = True, metadata: Optional[Dict] = None
93
+ ) -> Dict[str, Any]:
94
+ """
95
+ Track performance metrics for a specific operation.
96
+
97
+ Args:
98
+ module: Module name (operate, cfat, etc.)
99
+ operation: Operation name
100
+ execution_time: Time taken in seconds
101
+ success: Whether operation succeeded
102
+ metadata: Additional context data
103
+
104
+ Returns:
105
+ Performance analysis results
106
+ """
107
+ timestamp = datetime.now().isoformat()
108
+
109
+ # Performance analysis
110
+ target = self.performance_targets.get(module, {})
111
+ target_time = target.get("target_time", 30.0)
112
+ alert_threshold = target.get("alert_threshold", 60.0)
113
+
114
+ performance_status = "EXCELLENT"
115
+ if execution_time <= target_time:
116
+ performance_status = "EXCELLENT"
117
+ status_color = "green"
118
+ elif execution_time <= alert_threshold:
119
+ performance_status = "ACCEPTABLE"
120
+ status_color = "yellow"
121
+ else:
122
+ performance_status = "DEGRADED"
123
+ status_color = "red"
124
+
125
+ # Calculate performance score
126
+ performance_score = min(100, max(0, 100 - (execution_time / target_time - 1) * 50))
127
+
128
+ metric_data = {
129
+ "timestamp": timestamp,
130
+ "module": module,
131
+ "operation": operation,
132
+ "execution_time": execution_time,
133
+ "target_time": target_time,
134
+ "performance_status": performance_status,
135
+ "performance_score": performance_score,
136
+ "success": success,
137
+ "metadata": metadata or {},
138
+ }
139
+
140
+ # Store metric
141
+ self._store_metric(metric_data)
142
+
143
+ # Display real-time feedback
144
+ console.print(
145
+ f"[{status_color}]{performance_status}[/{status_color}] "
146
+ f"{module}.{operation}: {execution_time:.2f}s "
147
+ f"(target: {target_time}s, score: {performance_score:.1f}%)"
148
+ )
149
+
150
+ # Generate alert if performance degraded
151
+ if performance_status == "DEGRADED":
152
+ self._generate_performance_alert(module, operation, execution_time, target_time)
153
+
154
+ return metric_data
155
+
156
+ def _store_metric(self, metric_data: Dict[str, Any]) -> None:
157
+ """Store performance metric to persistent storage."""
158
+ try:
159
+ # Load existing metrics
160
+ if self.metrics_file.exists():
161
+ with open(self.metrics_file, "r") as f:
162
+ metrics = json.load(f)
163
+ else:
164
+ metrics = {"performance_data": []}
165
+
166
+ # Add new metric
167
+ metrics["performance_data"].append(metric_data)
168
+
169
+ # Keep only last 1000 metrics to prevent file bloat
170
+ if len(metrics["performance_data"]) > 1000:
171
+ metrics["performance_data"] = metrics["performance_data"][-1000:]
172
+
173
+ # Save updated metrics
174
+ with open(self.metrics_file, "w") as f:
175
+ json.dump(metrics, f, indent=2)
176
+
177
+ except Exception as e:
178
+ self.logger.error(f"Failed to store metric: {e}")
179
+
180
+ def _generate_performance_alert(
181
+ self, module: str, operation: str, execution_time: float, target_time: float
182
+ ) -> None:
183
+ """Generate performance degradation alert."""
184
+ alert_message = (
185
+ f"PERFORMANCE ALERT: {module}.{operation} "
186
+ f"execution time {execution_time:.2f}s exceeds target {target_time:.2f}s"
187
+ )
188
+
189
+ console.print(f"[red]⚠️ {alert_message}[/red]")
190
+ self.logger.warning(alert_message)
191
+
192
+ # Store alert for dashboard
193
+ alert_data = {
194
+ "timestamp": datetime.now().isoformat(),
195
+ "type": "performance_degradation",
196
+ "module": module,
197
+ "operation": operation,
198
+ "execution_time": execution_time,
199
+ "target_time": target_time,
200
+ "severity": "HIGH",
201
+ }
202
+
203
+ alerts_file = Path("artifacts/monitoring/performance_alerts.json")
204
+ alerts_file.parent.mkdir(parents=True, exist_ok=True)
205
+
206
+ try:
207
+ if alerts_file.exists():
208
+ with open(alerts_file, "r") as f:
209
+ alerts = json.load(f)
210
+ else:
211
+ alerts = {"alerts": []}
212
+
213
+ alerts["alerts"].append(alert_data)
214
+
215
+ with open(alerts_file, "w") as f:
216
+ json.dump(alerts, f, indent=2)
217
+
218
+ except Exception as e:
219
+ self.logger.error(f"Failed to store alert: {e}")
220
+
221
+ def get_performance_dashboard(self, hours: int = 24) -> Dict[str, Any]:
222
+ """
223
+ Generate performance dashboard data for specified time window.
224
+
225
+ Args:
226
+ hours: Number of hours to analyze
227
+
228
+ Returns:
229
+ Dashboard metrics and analysis
230
+ """
231
+ if not self.metrics_file.exists():
232
+ return {"status": "no_data", "message": "No performance data available"}
233
+
234
+ try:
235
+ with open(self.metrics_file, "r") as f:
236
+ data = json.load(f)
237
+
238
+ metrics = data.get("performance_data", [])
239
+
240
+ # Filter by time window
241
+ cutoff_time = datetime.now() - timedelta(hours=hours)
242
+ recent_metrics = [m for m in metrics if datetime.fromisoformat(m["timestamp"]) >= cutoff_time]
243
+
244
+ if not recent_metrics:
245
+ return {"status": "no_recent_data", "message": f"No data in last {hours} hours"}
246
+
247
+ # Analyze performance by module
248
+ module_stats = {}
249
+ for module in self.performance_targets.keys():
250
+ module_metrics = [m for m in recent_metrics if m["module"] == module]
251
+
252
+ if module_metrics:
253
+ execution_times = [m["execution_time"] for m in module_metrics]
254
+ success_count = sum(1 for m in module_metrics if m["success"])
255
+
256
+ module_stats[module] = {
257
+ "operations_count": len(module_metrics),
258
+ "success_rate": success_count / len(module_metrics) * 100,
259
+ "avg_execution_time": sum(execution_times) / len(execution_times),
260
+ "min_execution_time": min(execution_times),
261
+ "max_execution_time": max(execution_times),
262
+ "target_time": self.performance_targets[module]["target_time"],
263
+ "sla_compliance": sum(
264
+ 1 for t in execution_times if t <= self.performance_targets[module]["target_time"]
265
+ )
266
+ / len(execution_times)
267
+ * 100,
268
+ }
269
+
270
+ # Overall system health
271
+ total_operations = len(recent_metrics)
272
+ total_success = sum(1 for m in recent_metrics if m["success"])
273
+ avg_score = sum(m["performance_score"] for m in recent_metrics) / total_operations
274
+
275
+ dashboard = {
276
+ "status": "success",
277
+ "time_window_hours": hours,
278
+ "generated_at": datetime.now().isoformat(),
279
+ "overall_metrics": {
280
+ "total_operations": total_operations,
281
+ "success_rate": total_success / total_operations * 100,
282
+ "average_performance_score": avg_score,
283
+ "health_status": "EXCELLENT"
284
+ if avg_score >= 90
285
+ else "GOOD"
286
+ if avg_score >= 80
287
+ else "FAIR"
288
+ if avg_score >= 70
289
+ else "POOR",
290
+ },
291
+ "module_performance": module_stats,
292
+ }
293
+
294
+ return dashboard
295
+
296
+ except Exception as e:
297
+ self.logger.error(f"Failed to generate dashboard: {e}")
298
+ return {"status": "error", "message": str(e)}
299
+
300
+ def display_performance_dashboard(self, hours: int = 24) -> None:
301
+ """Display formatted performance dashboard."""
302
+ dashboard = self.get_performance_dashboard(hours)
303
+
304
+ if dashboard["status"] != "success":
305
+ console.print(f"[yellow]⚠️ {dashboard['message']}[/yellow]")
306
+ return
307
+
308
+ overall = dashboard["overall_metrics"]
309
+ modules = dashboard["module_performance"]
310
+
311
+ # Overall performance panel
312
+ overall_panel = Panel(
313
+ f"[green]Total Operations:[/green] {overall['total_operations']}\n"
314
+ f"[blue]Success Rate:[/blue] {overall['success_rate']:.1f}%\n"
315
+ f"[cyan]Performance Score:[/cyan] {overall['average_performance_score']:.1f}/100\n"
316
+ f"[bold]Health Status:[/bold] {overall['health_status']}",
317
+ title=f"📊 System Performance ({hours}h window)",
318
+ border_style="green" if overall["health_status"] == "EXCELLENT" else "yellow",
319
+ )
320
+
321
+ console.print(overall_panel)
322
+
323
+ # Module performance table
324
+ if modules:
325
+ table = Table(title="Module Performance Breakdown")
326
+ table.add_column("Module", style="bold")
327
+ table.add_column("Operations", justify="center")
328
+ table.add_column("Success Rate", justify="center")
329
+ table.add_column("Avg Time", justify="center")
330
+ table.add_column("Target", justify="center")
331
+ table.add_column("SLA Compliance", justify="center")
332
+ table.add_column("Status")
333
+
334
+ for module, stats in modules.items():
335
+ status_color = (
336
+ "green" if stats["sla_compliance"] >= 95 else "yellow" if stats["sla_compliance"] >= 90 else "red"
337
+ )
338
+
339
+ status = (
340
+ "✅ EXCELLENT"
341
+ if stats["sla_compliance"] >= 95
342
+ else "⚠️ ACCEPTABLE"
343
+ if stats["sla_compliance"] >= 90
344
+ else "❌ DEGRADED"
345
+ )
346
+
347
+ table.add_row(
348
+ module.title(),
349
+ str(stats["operations_count"]),
350
+ f"{stats['success_rate']:.1f}%",
351
+ f"{stats['avg_execution_time']:.2f}s",
352
+ f"{stats['target_time']:.1f}s",
353
+ f"{stats['sla_compliance']:.1f}%",
354
+ f"[{status_color}]{status}[/{status_color}]",
355
+ )
356
+
357
+ console.print(table)
358
+
359
+ def generate_monitoring_report(self) -> str:
360
+ """Generate comprehensive monitoring report for stakeholders."""
361
+ dashboard = self.get_performance_dashboard(24) # Last 24 hours
362
+
363
+ if dashboard["status"] != "success":
364
+ return f"Monitoring Report: {dashboard['message']}"
365
+
366
+ report_lines = [
367
+ "# CloudOps Runbooks Performance Report",
368
+ f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
369
+ f"**Time Window:** 24 hours",
370
+ "",
371
+ "## Executive Summary",
372
+ f"- **Total Operations:** {dashboard['overall_metrics']['total_operations']}",
373
+ f"- **Success Rate:** {dashboard['overall_metrics']['success_rate']:.1f}%",
374
+ f"- **Performance Score:** {dashboard['overall_metrics']['average_performance_score']:.1f}/100",
375
+ f"- **System Health:** {dashboard['overall_metrics']['health_status']}",
376
+ "",
377
+ "## Module Performance",
378
+ ]
379
+
380
+ for module, stats in dashboard["module_performance"].items():
381
+ status = (
382
+ "🟢 Excellent"
383
+ if stats["sla_compliance"] >= 95
384
+ else "🟡 Acceptable"
385
+ if stats["sla_compliance"] >= 90
386
+ else "🔴 Degraded"
387
+ )
388
+
389
+ report_lines.extend(
390
+ [
391
+ f"### {module.title()}",
392
+ f"- Operations: {stats['operations_count']}",
393
+ f"- Average Time: {stats['avg_execution_time']:.2f}s (target: {stats['target_time']}s)",
394
+ f"- SLA Compliance: {stats['sla_compliance']:.1f}%",
395
+ f"- Status: {status}",
396
+ "",
397
+ ]
398
+ )
399
+
400
+ return "\n".join(report_lines)
401
+
402
+
403
+ # Usage examples and testing
404
+ if __name__ == "__main__":
405
+ monitor = PerformanceMonitor()
406
+
407
+ # Example usage - tracking operations
408
+ console.print("[bold blue]🔍 CloudOps Runbooks Performance Monitor[/bold blue]")
409
+ console.print("Tracking sample operations...")
410
+
411
+ # Simulate some operations
412
+ import random
413
+
414
+ modules = ["operate", "cfat", "inventory", "security", "finops"]
415
+ operations = ["start", "assess", "collect", "scan", "analyze"]
416
+
417
+ for i in range(5):
418
+ module = random.choice(modules)
419
+ operation = random.choice(operations)
420
+ # Simulate execution time - mostly good performance with occasional slow operations
421
+ exec_time = random.uniform(0.5, 2.0) if random.random() > 0.1 else random.uniform(10, 50)
422
+ success = random.random() > 0.05 # 95% success rate
423
+
424
+ monitor.track_operation(module, operation, exec_time, success)
425
+ time.sleep(0.1) # Brief pause
426
+
427
+ # Display dashboard
428
+ console.print("\n")
429
+ monitor.display_performance_dashboard()
430
+
431
+ # Generate report
432
+ report = monitor.generate_monitoring_report()
433
+ console.print(f"\n[dim]{report}[/dim]")