runbooks 0.7.9__py3-none-any.whl → 0.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. runbooks/__init__.py +1 -1
  2. runbooks/cfat/README.md +12 -1
  3. runbooks/cfat/__init__.py +1 -1
  4. runbooks/cfat/assessment/compliance.py +4 -1
  5. runbooks/cfat/assessment/runner.py +42 -34
  6. runbooks/cfat/models.py +1 -1
  7. runbooks/cloudops/__init__.py +123 -0
  8. runbooks/cloudops/base.py +385 -0
  9. runbooks/cloudops/cost_optimizer.py +811 -0
  10. runbooks/cloudops/infrastructure_optimizer.py +29 -0
  11. runbooks/cloudops/interfaces.py +828 -0
  12. runbooks/cloudops/lifecycle_manager.py +29 -0
  13. runbooks/cloudops/mcp_cost_validation.py +678 -0
  14. runbooks/cloudops/models.py +251 -0
  15. runbooks/cloudops/monitoring_automation.py +29 -0
  16. runbooks/cloudops/notebook_framework.py +676 -0
  17. runbooks/cloudops/security_enforcer.py +449 -0
  18. runbooks/common/__init__.py +152 -0
  19. runbooks/common/accuracy_validator.py +1039 -0
  20. runbooks/common/context_logger.py +440 -0
  21. runbooks/common/cross_module_integration.py +594 -0
  22. runbooks/common/enhanced_exception_handler.py +1108 -0
  23. runbooks/common/enterprise_audit_integration.py +634 -0
  24. runbooks/common/mcp_cost_explorer_integration.py +900 -0
  25. runbooks/common/mcp_integration.py +548 -0
  26. runbooks/common/performance_monitor.py +387 -0
  27. runbooks/common/profile_utils.py +216 -0
  28. runbooks/common/rich_utils.py +172 -1
  29. runbooks/feedback/user_feedback_collector.py +440 -0
  30. runbooks/finops/README.md +377 -458
  31. runbooks/finops/__init__.py +4 -21
  32. runbooks/finops/account_resolver.py +279 -0
  33. runbooks/finops/accuracy_cross_validator.py +638 -0
  34. runbooks/finops/aws_client.py +721 -36
  35. runbooks/finops/budget_integration.py +313 -0
  36. runbooks/finops/cli.py +59 -5
  37. runbooks/finops/cost_optimizer.py +1340 -0
  38. runbooks/finops/cost_processor.py +211 -37
  39. runbooks/finops/dashboard_router.py +900 -0
  40. runbooks/finops/dashboard_runner.py +990 -232
  41. runbooks/finops/embedded_mcp_validator.py +288 -0
  42. runbooks/finops/enhanced_dashboard_runner.py +8 -7
  43. runbooks/finops/enhanced_progress.py +327 -0
  44. runbooks/finops/enhanced_trend_visualization.py +423 -0
  45. runbooks/finops/finops_dashboard.py +184 -1829
  46. runbooks/finops/helpers.py +509 -196
  47. runbooks/finops/iam_guidance.py +400 -0
  48. runbooks/finops/markdown_exporter.py +466 -0
  49. runbooks/finops/multi_dashboard.py +1502 -0
  50. runbooks/finops/optimizer.py +15 -15
  51. runbooks/finops/profile_processor.py +2 -2
  52. runbooks/finops/runbooks.inventory.organizations_discovery.log +0 -0
  53. runbooks/finops/runbooks.security.report_generator.log +0 -0
  54. runbooks/finops/runbooks.security.run_script.log +0 -0
  55. runbooks/finops/runbooks.security.security_export.log +0 -0
  56. runbooks/finops/schemas.py +589 -0
  57. runbooks/finops/service_mapping.py +195 -0
  58. runbooks/finops/single_dashboard.py +710 -0
  59. runbooks/finops/tests/test_reference_images_validation.py +1 -1
  60. runbooks/inventory/README.md +12 -1
  61. runbooks/inventory/core/collector.py +157 -29
  62. runbooks/inventory/list_ec2_instances.py +9 -6
  63. runbooks/inventory/list_ssm_parameters.py +10 -10
  64. runbooks/inventory/organizations_discovery.py +210 -164
  65. runbooks/inventory/rich_inventory_display.py +74 -107
  66. runbooks/inventory/run_on_multi_accounts.py +13 -13
  67. runbooks/inventory/runbooks.inventory.organizations_discovery.log +0 -0
  68. runbooks/inventory/runbooks.security.security_export.log +0 -0
  69. runbooks/main.py +1371 -240
  70. runbooks/metrics/dora_metrics_engine.py +711 -17
  71. runbooks/monitoring/performance_monitor.py +433 -0
  72. runbooks/operate/README.md +394 -0
  73. runbooks/operate/base.py +215 -47
  74. runbooks/operate/ec2_operations.py +435 -5
  75. runbooks/operate/iam_operations.py +598 -3
  76. runbooks/operate/privatelink_operations.py +1 -1
  77. runbooks/operate/rds_operations.py +508 -0
  78. runbooks/operate/s3_operations.py +508 -0
  79. runbooks/operate/vpc_endpoints.py +1 -1
  80. runbooks/remediation/README.md +489 -13
  81. runbooks/remediation/base.py +5 -3
  82. runbooks/remediation/commons.py +8 -4
  83. runbooks/security/ENTERPRISE_SECURITY_FRAMEWORK.md +506 -0
  84. runbooks/security/README.md +12 -1
  85. runbooks/security/__init__.py +265 -33
  86. runbooks/security/cloudops_automation_security_validator.py +1164 -0
  87. runbooks/security/compliance_automation.py +12 -10
  88. runbooks/security/compliance_automation_engine.py +1021 -0
  89. runbooks/security/enterprise_security_framework.py +930 -0
  90. runbooks/security/enterprise_security_policies.json +293 -0
  91. runbooks/security/executive_security_dashboard.py +1247 -0
  92. runbooks/security/integration_test_enterprise_security.py +879 -0
  93. runbooks/security/module_security_integrator.py +641 -0
  94. runbooks/security/multi_account_security_controls.py +2254 -0
  95. runbooks/security/real_time_security_monitor.py +1196 -0
  96. runbooks/security/report_generator.py +1 -1
  97. runbooks/security/run_script.py +4 -8
  98. runbooks/security/security_baseline_tester.py +39 -52
  99. runbooks/security/security_export.py +99 -120
  100. runbooks/sre/README.md +472 -0
  101. runbooks/sre/__init__.py +33 -0
  102. runbooks/sre/mcp_reliability_engine.py +1049 -0
  103. runbooks/sre/performance_optimization_engine.py +1032 -0
  104. runbooks/sre/production_monitoring_framework.py +584 -0
  105. runbooks/sre/reliability_monitoring_framework.py +1011 -0
  106. runbooks/validation/__init__.py +2 -2
  107. runbooks/validation/benchmark.py +154 -149
  108. runbooks/validation/cli.py +159 -147
  109. runbooks/validation/mcp_validator.py +291 -248
  110. runbooks/vpc/README.md +478 -0
  111. runbooks/vpc/__init__.py +2 -2
  112. runbooks/vpc/manager_interface.py +366 -351
  113. runbooks/vpc/networking_wrapper.py +68 -36
  114. runbooks/vpc/rich_formatters.py +22 -8
  115. runbooks-0.9.1.dist-info/METADATA +308 -0
  116. {runbooks-0.7.9.dist-info → runbooks-0.9.1.dist-info}/RECORD +120 -59
  117. {runbooks-0.7.9.dist-info → runbooks-0.9.1.dist-info}/entry_points.txt +1 -1
  118. runbooks/finops/cross_validation.py +0 -375
  119. runbooks-0.7.9.dist-info/METADATA +0 -636
  120. {runbooks-0.7.9.dist-info → runbooks-0.9.1.dist-info}/WHEEL +0 -0
  121. {runbooks-0.7.9.dist-info → runbooks-0.9.1.dist-info}/licenses/LICENSE +0 -0
  122. {runbooks-0.7.9.dist-info → runbooks-0.9.1.dist-info}/top_level.txt +0 -0
@@ -5,6 +5,6 @@ Provides comprehensive validation between runbooks outputs and MCP server result
5
5
  for enterprise AWS operations with 99.5% accuracy target.
6
6
  """
7
7
 
8
- from .mcp_validator import MCPValidator, ValidationResult, ValidationReport, ValidationStatus
8
+ from .mcp_validator import MCPValidator, ValidationReport, ValidationResult, ValidationStatus
9
9
 
10
- __all__ = ['MCPValidator', 'ValidationResult', 'ValidationReport', 'ValidationStatus']
10
+ __all__ = ["MCPValidator", "ValidationResult", "ValidationReport", "ValidationStatus"]
@@ -14,26 +14,28 @@ Usage:
14
14
  """
15
15
 
16
16
  import asyncio
17
- import time
17
+ import json
18
18
  import statistics
19
- from datetime import datetime
20
- from typing import List, Dict, Any
19
+ import time
21
20
  from dataclasses import dataclass
22
- import json
21
+ from datetime import datetime
22
+ from typing import Any, Dict, List
23
23
 
24
+ from rich import box
24
25
  from rich.console import Console
25
- from rich.table import Table
26
26
  from rich.panel import Panel
27
27
  from rich.progress import Progress, TaskID
28
- from rich import box
28
+ from rich.table import Table
29
29
 
30
30
  from .mcp_validator import MCPValidator, ValidationReport, ValidationStatus
31
31
 
32
32
  console = Console()
33
33
 
34
+
34
35
  @dataclass
35
36
  class BenchmarkResult:
36
37
  """Individual benchmark iteration result."""
38
+
37
39
  iteration: int
38
40
  accuracy: float
39
41
  execution_time: float
@@ -42,14 +44,16 @@ class BenchmarkResult:
42
44
  timestamp: datetime
43
45
  details: ValidationReport
44
46
 
47
+
45
48
  @dataclass
46
49
  class BenchmarkSuite:
47
50
  """Complete benchmark suite results."""
51
+
48
52
  target_accuracy: float
49
53
  performance_target: float
50
54
  iterations: int
51
55
  results: List[BenchmarkResult]
52
-
56
+
53
57
  # Summary statistics
54
58
  avg_accuracy: float
55
59
  avg_execution_time: float
@@ -60,95 +64,96 @@ class BenchmarkSuite:
60
64
  accuracy_std_dev: float
61
65
  time_std_dev: float
62
66
  success_rate: float
63
-
67
+
64
68
  # SRE metrics
65
69
  availability: float # % of successful validations
66
70
  reliability_score: float # Combined accuracy + performance
67
71
  performance_consistency: float # Low variance = high consistency
68
72
 
73
+
69
74
  class MCPBenchmarkRunner:
70
75
  """
71
76
  Enterprise benchmark runner for MCP validation framework.
72
-
77
+
73
78
  Provides comprehensive performance testing with SRE reliability metrics
74
79
  and enterprise reporting for production deployment validation.
75
80
  """
76
-
77
- def __init__(self,
78
- target_accuracy: float = 99.5,
79
- performance_target: float = 30.0,
80
- tolerance_percentage: float = 5.0):
81
+
82
+ def __init__(
83
+ self, target_accuracy: float = 99.5, performance_target: float = 30.0, tolerance_percentage: float = 5.0
84
+ ):
81
85
  """Initialize benchmark runner."""
82
-
86
+
83
87
  self.target_accuracy = target_accuracy
84
88
  self.performance_target = performance_target
85
89
  self.tolerance_percentage = tolerance_percentage
86
-
87
- console.print(Panel(
88
- f"[bold blue]MCP Validation Benchmark Suite[/bold blue]\n"
89
- f"Target Accuracy: {target_accuracy}%\n"
90
- f"Performance Target: <{performance_target}s\n"
91
- f"Tolerance: ±{tolerance_percentage}%",
92
- title="Enterprise Benchmark Framework"
93
- ))
94
-
90
+
91
+ console.print(
92
+ Panel(
93
+ f"[bold blue]MCP Validation Benchmark Suite[/bold blue]\n"
94
+ f"Target Accuracy: {target_accuracy}%\n"
95
+ f"Performance Target: <{performance_target}s\n"
96
+ f"Tolerance: ±{tolerance_percentage}%",
97
+ title="Enterprise Benchmark Framework",
98
+ )
99
+ )
100
+
95
101
  async def run_benchmark(self, iterations: int = 5) -> BenchmarkSuite:
96
102
  """
97
103
  Run comprehensive benchmark across multiple iterations.
98
-
104
+
99
105
  Args:
100
106
  iterations: Number of benchmark iterations to run
101
-
107
+
102
108
  Returns:
103
109
  BenchmarkSuite with complete performance analysis
104
110
  """
105
-
111
+
106
112
  console.print(f"\n[bold cyan]Starting {iterations} benchmark iterations...[/bold cyan]")
107
-
113
+
108
114
  results: List[BenchmarkResult] = []
109
-
115
+
110
116
  with Progress() as progress:
111
117
  task = progress.add_task("[cyan]Running benchmark...", total=iterations)
112
-
118
+
113
119
  for i in range(iterations):
114
- progress.console.print(f"\n[bold green]→ Iteration {i+1}/{iterations}[/bold green]")
115
-
120
+ progress.console.print(f"\n[bold green]→ Iteration {i + 1}/{iterations}[/bold green]")
121
+
116
122
  # Run single benchmark iteration
117
123
  result = await self._run_single_iteration(i + 1, progress)
118
124
  results.append(result)
119
-
125
+
120
126
  # Display iteration summary
121
127
  status_color = "green" if result.accuracy >= self.target_accuracy else "red"
122
128
  perf_color = "green" if result.execution_time <= self.performance_target else "red"
123
-
129
+
124
130
  progress.console.print(
125
131
  f" Accuracy: [{status_color}]{result.accuracy:.1f}%[/{status_color}] | "
126
132
  f"Time: [{perf_color}]{result.execution_time:.1f}s[/{perf_color}] | "
127
133
  f"Passed: {result.passed_validations}/{result.total_validations}"
128
134
  )
129
-
135
+
130
136
  progress.advance(task)
131
-
137
+
132
138
  # Calculate benchmark suite statistics
133
139
  return self._calculate_benchmark_statistics(results)
134
-
140
+
135
141
  async def _run_single_iteration(self, iteration: int, progress: Progress) -> BenchmarkResult:
136
142
  """Run single benchmark iteration."""
137
-
143
+
138
144
  start_time = time.time()
139
-
145
+
140
146
  # Initialize validator for this iteration
141
147
  validator = MCPValidator(
142
- tolerance_percentage=self.tolerance_percentage,
143
- performance_target_seconds=self.performance_target
148
+ tolerance_percentage=self.tolerance_percentage, performance_target_seconds=self.performance_target
144
149
  )
145
-
150
+
146
151
  # Run validation
147
152
  try:
148
153
  report = await validator.validate_all_operations()
149
-
154
+
150
155
  execution_time = time.time() - start_time
151
-
156
+
152
157
  return BenchmarkResult(
153
158
  iteration=iteration,
154
159
  accuracy=report.overall_accuracy,
@@ -156,13 +161,13 @@ class MCPBenchmarkRunner:
156
161
  passed_validations=report.passed_validations,
157
162
  total_validations=report.total_validations,
158
163
  timestamp=datetime.now(),
159
- details=report
164
+ details=report,
160
165
  )
161
-
166
+
162
167
  except Exception as e:
163
168
  execution_time = time.time() - start_time
164
169
  progress.console.print(f"[red]Iteration {iteration} failed: {e}[/red]")
165
-
170
+
166
171
  # Return failed iteration
167
172
  return BenchmarkResult(
168
173
  iteration=iteration,
@@ -171,46 +176,48 @@ class MCPBenchmarkRunner:
171
176
  passed_validations=0,
172
177
  total_validations=5, # Expected number of validations
173
178
  timestamp=datetime.now(),
174
- details=None
179
+ details=None,
175
180
  )
176
-
181
+
177
182
  def _calculate_benchmark_statistics(self, results: List[BenchmarkResult]) -> BenchmarkSuite:
178
183
  """Calculate comprehensive benchmark statistics."""
179
-
184
+
180
185
  if not results:
181
186
  raise ValueError("No benchmark results to analyze")
182
-
187
+
183
188
  # Basic statistics
184
189
  accuracies = [r.accuracy for r in results]
185
190
  times = [r.execution_time for r in results]
186
-
191
+
187
192
  avg_accuracy = statistics.mean(accuracies)
188
193
  avg_execution_time = statistics.mean(times)
189
194
  min_accuracy = min(accuracies)
190
195
  max_accuracy = max(accuracies)
191
196
  min_execution_time = min(times)
192
197
  max_execution_time = max(times)
193
-
198
+
194
199
  # Calculate standard deviations
195
200
  accuracy_std_dev = statistics.stdev(accuracies) if len(accuracies) > 1 else 0.0
196
201
  time_std_dev = statistics.stdev(times) if len(times) > 1 else 0.0
197
-
202
+
198
203
  # Success rate (meeting target accuracy)
199
204
  successful_iterations = len([r for r in results if r.accuracy >= self.target_accuracy])
200
205
  success_rate = (successful_iterations / len(results)) * 100
201
-
206
+
202
207
  # SRE reliability metrics
203
208
  availability = len([r for r in results if r.accuracy > 0]) / len(results) * 100
204
-
209
+
205
210
  # Reliability score (weighted accuracy + performance)
206
211
  accuracy_score = min(100, avg_accuracy / self.target_accuracy * 100)
207
- performance_score = min(100, self.performance_target / avg_execution_time * 100) if avg_execution_time > 0 else 0
212
+ performance_score = (
213
+ min(100, self.performance_target / avg_execution_time * 100) if avg_execution_time > 0 else 0
214
+ )
208
215
  reliability_score = (accuracy_score * 0.7) + (performance_score * 0.3) # 70% accuracy, 30% performance
209
-
216
+
210
217
  # Performance consistency (lower std dev = higher consistency)
211
218
  max_acceptable_std_dev = 5.0 # 5% standard deviation is acceptable
212
219
  performance_consistency = max(0, 100 - (accuracy_std_dev / max_acceptable_std_dev * 100))
213
-
220
+
214
221
  return BenchmarkSuite(
215
222
  target_accuracy=self.target_accuracy,
216
223
  performance_target=self.performance_target,
@@ -227,88 +234,78 @@ class MCPBenchmarkRunner:
227
234
  success_rate=success_rate,
228
235
  availability=availability,
229
236
  reliability_score=reliability_score,
230
- performance_consistency=performance_consistency
237
+ performance_consistency=performance_consistency,
231
238
  )
232
-
239
+
233
240
  def display_benchmark_results(self, suite: BenchmarkSuite) -> None:
234
241
  """Display comprehensive benchmark results."""
235
-
242
+
236
243
  # Overall assessment
237
244
  overall_status = self._assess_benchmark_results(suite)
238
245
  status_color = "green" if overall_status == "PASSED" else "red" if overall_status == "FAILED" else "yellow"
239
-
240
- console.print(Panel(
241
- f"[bold {status_color}]Benchmark Status: {overall_status}[/bold {status_color}]\n"
242
- f"Average Accuracy: {suite.avg_accuracy:.2f}% (Target: {suite.target_accuracy}%)\n"
243
- f"Average Time: {suite.avg_execution_time:.1f}s (Target: <{suite.performance_target}s)\n"
244
- f"Success Rate: {suite.success_rate:.1f}% | Reliability: {suite.reliability_score:.1f}%",
245
- title="Benchmark Summary"
246
- ))
247
-
246
+
247
+ console.print(
248
+ Panel(
249
+ f"[bold {status_color}]Benchmark Status: {overall_status}[/bold {status_color}]\n"
250
+ f"Average Accuracy: {suite.avg_accuracy:.2f}% (Target: {suite.target_accuracy}%)\n"
251
+ f"Average Time: {suite.avg_execution_time:.1f}s (Target: <{suite.performance_target}s)\n"
252
+ f"Success Rate: {suite.success_rate:.1f}% | Reliability: {suite.reliability_score:.1f}%",
253
+ title="Benchmark Summary",
254
+ )
255
+ )
256
+
248
257
  # Detailed statistics table
249
258
  stats_table = Table(title="Performance Statistics", box=box.ROUNDED)
250
259
  stats_table.add_column("Metric", style="cyan", no_wrap=True)
251
260
  stats_table.add_column("Value", justify="right", style="bold")
252
261
  stats_table.add_column("Status", style="bold")
253
-
262
+
254
263
  # Accuracy metrics
255
264
  stats_table.add_row(
256
- "Average Accuracy",
265
+ "Average Accuracy",
257
266
  f"{suite.avg_accuracy:.2f}%",
258
- "✅ PASS" if suite.avg_accuracy >= suite.target_accuracy else "❌ FAIL"
259
- )
260
- stats_table.add_row(
261
- "Accuracy Range",
262
- f"{suite.min_accuracy:.1f}% - {suite.max_accuracy:.1f}%",
263
- "ℹ️ INFO"
267
+ "✅ PASS" if suite.avg_accuracy >= suite.target_accuracy else "❌ FAIL",
264
268
  )
269
+ stats_table.add_row("Accuracy Range", f"{suite.min_accuracy:.1f}% - {suite.max_accuracy:.1f}%", "ℹ️ INFO")
265
270
  stats_table.add_row(
266
271
  "Accuracy Std Dev",
267
272
  f"{suite.accuracy_std_dev:.2f}%",
268
- "✅ GOOD" if suite.accuracy_std_dev < 5.0 else "⚠️ HIGH"
273
+ "✅ GOOD" if suite.accuracy_std_dev < 5.0 else "⚠️ HIGH",
269
274
  )
270
-
275
+
271
276
  # Performance metrics
272
277
  stats_table.add_row(
273
278
  "Average Time",
274
279
  f"{suite.avg_execution_time:.1f}s",
275
- "✅ PASS" if suite.avg_execution_time <= suite.performance_target else "❌ FAIL"
280
+ "✅ PASS" if suite.avg_execution_time <= suite.performance_target else "❌ FAIL",
276
281
  )
277
282
  stats_table.add_row(
278
- "Time Range",
279
- f"{suite.min_execution_time:.1f}s - {suite.max_execution_time:.1f}s",
280
- "ℹ️ INFO"
283
+ "Time Range", f"{suite.min_execution_time:.1f}s - {suite.max_execution_time:.1f}s", "ℹ️ INFO"
281
284
  )
282
285
  stats_table.add_row(
283
- "Time Std Dev",
284
- f"{suite.time_std_dev:.1f}s",
285
- "✅ GOOD" if suite.time_std_dev < 5.0 else "⚠️ HIGH"
286
+ "Time Std Dev", f"{suite.time_std_dev:.1f}s", "✅ GOOD" if suite.time_std_dev < 5.0 else "⚠️ HIGH"
286
287
  )
287
-
288
+
288
289
  # SRE metrics
289
290
  stats_table.add_row(
290
- "Success Rate",
291
- f"{suite.success_rate:.1f}%",
292
- "✅ EXCELLENT" if suite.success_rate >= 80 else "❌ POOR"
291
+ "Success Rate", f"{suite.success_rate:.1f}%", "✅ EXCELLENT" if suite.success_rate >= 80 else "❌ POOR"
293
292
  )
294
293
  stats_table.add_row(
295
- "Availability",
296
- f"{suite.availability:.1f}%",
297
- "✅ PASS" if suite.availability >= 99 else "❌ FAIL"
294
+ "Availability", f"{suite.availability:.1f}%", "✅ PASS" if suite.availability >= 99 else "❌ FAIL"
298
295
  )
299
296
  stats_table.add_row(
300
297
  "Reliability Score",
301
298
  f"{suite.reliability_score:.1f}%",
302
- "✅ EXCELLENT" if suite.reliability_score >= 90 else "⚠️ NEEDS WORK"
299
+ "✅ EXCELLENT" if suite.reliability_score >= 90 else "⚠️ NEEDS WORK",
303
300
  )
304
301
  stats_table.add_row(
305
302
  "Consistency",
306
303
  f"{suite.performance_consistency:.1f}%",
307
- "✅ STABLE" if suite.performance_consistency >= 80 else "⚠️ VARIABLE"
304
+ "✅ STABLE" if suite.performance_consistency >= 80 else "⚠️ VARIABLE",
308
305
  )
309
-
306
+
310
307
  console.print(stats_table)
311
-
308
+
312
309
  # Individual iteration results
313
310
  iterations_table = Table(title="Individual Iterations", box=box.MINIMAL)
314
311
  iterations_table.add_column("Iteration", justify="center")
@@ -316,78 +313,84 @@ class MCPBenchmarkRunner:
316
313
  iterations_table.add_column("Time (s)", justify="right")
317
314
  iterations_table.add_column("Passed/Total")
318
315
  iterations_table.add_column("Status", style="bold")
319
-
316
+
320
317
  for result in suite.results:
321
318
  status_color = "green" if result.accuracy >= suite.target_accuracy else "red"
322
- status = "PASS" if result.accuracy >= suite.target_accuracy and result.execution_time <= suite.performance_target else "FAIL"
323
-
319
+ status = (
320
+ "PASS"
321
+ if result.accuracy >= suite.target_accuracy and result.execution_time <= suite.performance_target
322
+ else "FAIL"
323
+ )
324
+
324
325
  iterations_table.add_row(
325
326
  str(result.iteration),
326
327
  f"{result.accuracy:.1f}%",
327
328
  f"{result.execution_time:.1f}",
328
329
  f"{result.passed_validations}/{result.total_validations}",
329
- f"[{status_color}]{status}[/{status_color}]"
330
+ f"[{status_color}]{status}[/{status_color}]",
330
331
  )
331
-
332
+
332
333
  console.print(iterations_table)
333
-
334
+
334
335
  # Recommendations
335
336
  recommendations = self._generate_benchmark_recommendations(suite)
336
337
  if recommendations:
337
- console.print(Panel(
338
- "\n".join(f"• {rec}" for rec in recommendations),
339
- title="Recommendations",
340
- border_style="blue"
341
- ))
342
-
338
+ console.print(
339
+ Panel("\n".join(f"• {rec}" for rec in recommendations), title="Recommendations", border_style="blue")
340
+ )
341
+
343
342
  # Save benchmark report
344
343
  self._save_benchmark_report(suite)
345
-
344
+
346
345
  def _assess_benchmark_results(self, suite: BenchmarkSuite) -> str:
347
346
  """Assess overall benchmark results."""
348
-
347
+
349
348
  accuracy_pass = suite.avg_accuracy >= suite.target_accuracy
350
349
  performance_pass = suite.avg_execution_time <= suite.performance_target
351
350
  reliability_pass = suite.reliability_score >= 90
352
351
  consistency_pass = suite.accuracy_std_dev < 5.0
353
-
352
+
354
353
  if accuracy_pass and performance_pass and reliability_pass:
355
354
  return "PASSED"
356
355
  elif accuracy_pass and performance_pass:
357
356
  return "WARNING"
358
357
  else:
359
358
  return "FAILED"
360
-
359
+
361
360
  def _generate_benchmark_recommendations(self, suite: BenchmarkSuite) -> List[str]:
362
361
  """Generate actionable recommendations based on benchmark results."""
363
-
362
+
364
363
  recommendations = []
365
-
364
+
366
365
  # Accuracy recommendations
367
366
  if suite.avg_accuracy < suite.target_accuracy:
368
- recommendations.append(f"🎯 Improve average accuracy from {suite.avg_accuracy:.1f}% to {suite.target_accuracy}%")
367
+ recommendations.append(
368
+ f"🎯 Improve average accuracy from {suite.avg_accuracy:.1f}% to {suite.target_accuracy}%"
369
+ )
369
370
  recommendations.append("🔍 Review MCP integration and AWS API permissions")
370
-
371
- # Performance recommendations
371
+
372
+ # Performance recommendations
372
373
  if suite.avg_execution_time > suite.performance_target:
373
- recommendations.append(f"⚡ Optimize performance from {suite.avg_execution_time:.1f}s to <{suite.performance_target}s")
374
+ recommendations.append(
375
+ f"⚡ Optimize performance from {suite.avg_execution_time:.1f}s to <{suite.performance_target}s"
376
+ )
374
377
  recommendations.append("🚀 Consider parallel validation and caching strategies")
375
-
378
+
376
379
  # Consistency recommendations
377
380
  if suite.accuracy_std_dev > 5.0:
378
381
  recommendations.append(f"📊 Improve consistency - accuracy std dev {suite.accuracy_std_dev:.1f}% is high")
379
382
  recommendations.append("🔧 Investigate sources of validation variance")
380
-
383
+
381
384
  # Reliability recommendations
382
385
  if suite.reliability_score < 90:
383
386
  recommendations.append(f"🛠️ Enhance reliability score from {suite.reliability_score:.1f}% to >90%")
384
387
  recommendations.append("📈 Focus on both accuracy and performance improvements")
385
-
388
+
386
389
  # Success rate recommendations
387
390
  if suite.success_rate < 80:
388
391
  recommendations.append(f"✅ Improve success rate from {suite.success_rate:.1f}% to >80%")
389
392
  recommendations.append("🎯 Address systematic issues causing validation failures")
390
-
393
+
391
394
  # Production readiness
392
395
  overall_status = self._assess_benchmark_results(suite)
393
396
  if overall_status == "PASSED":
@@ -396,20 +399,20 @@ class MCPBenchmarkRunner:
396
399
  recommendations.append("⚠️ Benchmark WARNING - Address consistency issues before production")
397
400
  else:
398
401
  recommendations.append("❌ Benchmark FAILED - Significant improvements needed before production")
399
-
402
+
400
403
  return recommendations
401
-
404
+
402
405
  def _save_benchmark_report(self, suite: BenchmarkSuite) -> None:
403
406
  """Save benchmark report to artifacts directory."""
404
-
407
+
405
408
  from pathlib import Path
406
-
409
+
407
410
  artifacts_dir = Path("./artifacts/benchmark")
408
411
  artifacts_dir.mkdir(parents=True, exist_ok=True)
409
-
412
+
410
413
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
411
414
  report_file = artifacts_dir / f"mcp_benchmark_{timestamp}.json"
412
-
415
+
413
416
  # Convert to serializable format
414
417
  report_data = {
415
418
  "benchmark_suite": {
@@ -427,7 +430,7 @@ class MCPBenchmarkRunner:
427
430
  "success_rate": suite.success_rate,
428
431
  "availability": suite.availability,
429
432
  "reliability_score": suite.reliability_score,
430
- "performance_consistency": suite.performance_consistency
433
+ "performance_consistency": suite.performance_consistency,
431
434
  },
432
435
  "results": [
433
436
  {
@@ -436,41 +439,42 @@ class MCPBenchmarkRunner:
436
439
  "execution_time": r.execution_time,
437
440
  "passed_validations": r.passed_validations,
438
441
  "total_validations": r.total_validations,
439
- "timestamp": r.timestamp.isoformat()
442
+ "timestamp": r.timestamp.isoformat(),
440
443
  }
441
444
  for r in suite.results
442
445
  ],
443
446
  "assessment": self._assess_benchmark_results(suite),
444
- "recommendations": self._generate_benchmark_recommendations(suite)
447
+ "recommendations": self._generate_benchmark_recommendations(suite),
445
448
  }
446
-
447
- with open(report_file, 'w') as f:
449
+
450
+ with open(report_file, "w") as f:
448
451
  json.dump(report_data, f, indent=2)
449
-
452
+
450
453
  console.print(f"[green]Benchmark report saved:[/green] {report_file}")
451
454
 
455
+
452
456
  # CLI entry point
453
457
  async def main():
454
458
  """CLI entry point for benchmark runner."""
455
459
  import argparse
456
-
460
+
457
461
  parser = argparse.ArgumentParser(description="MCP Validation Benchmark Suite")
458
- parser.add_argument('--iterations', type=int, default=5, help='Number of benchmark iterations')
459
- parser.add_argument('--target-accuracy', type=float, default=99.5, help='Target accuracy percentage')
460
- parser.add_argument('--performance-target', type=float, default=30.0, help='Performance target in seconds')
461
- parser.add_argument('--tolerance', type=float, default=5.0, help='Tolerance percentage')
462
-
462
+ parser.add_argument("--iterations", type=int, default=5, help="Number of benchmark iterations")
463
+ parser.add_argument("--target-accuracy", type=float, default=99.5, help="Target accuracy percentage")
464
+ parser.add_argument("--performance-target", type=float, default=30.0, help="Performance target in seconds")
465
+ parser.add_argument("--tolerance", type=float, default=5.0, help="Tolerance percentage")
466
+
463
467
  args = parser.parse_args()
464
-
468
+
465
469
  runner = MCPBenchmarkRunner(
466
470
  target_accuracy=args.target_accuracy,
467
471
  performance_target=args.performance_target,
468
- tolerance_percentage=args.tolerance
472
+ tolerance_percentage=args.tolerance,
469
473
  )
470
-
474
+
471
475
  suite = await runner.run_benchmark(args.iterations)
472
476
  runner.display_benchmark_results(suite)
473
-
477
+
474
478
  # Exit with appropriate code
475
479
  overall_status = runner._assess_benchmark_results(suite)
476
480
  if overall_status == "PASSED":
@@ -480,5 +484,6 @@ async def main():
480
484
  else:
481
485
  exit(2)
482
486
 
483
- if __name__ == '__main__':
484
- asyncio.run(main())
487
+
488
+ if __name__ == "__main__":
489
+ asyncio.run(main())