kailash 0.9.15__py3-none-any.whl → 0.9.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. kailash/__init__.py +4 -3
  2. kailash/middleware/database/base_models.py +7 -1
  3. kailash/migration/__init__.py +30 -0
  4. kailash/migration/cli.py +340 -0
  5. kailash/migration/compatibility_checker.py +662 -0
  6. kailash/migration/configuration_validator.py +837 -0
  7. kailash/migration/documentation_generator.py +1828 -0
  8. kailash/migration/examples/__init__.py +5 -0
  9. kailash/migration/examples/complete_migration_example.py +692 -0
  10. kailash/migration/migration_assistant.py +715 -0
  11. kailash/migration/performance_comparator.py +760 -0
  12. kailash/migration/regression_detector.py +1141 -0
  13. kailash/migration/tests/__init__.py +6 -0
  14. kailash/migration/tests/test_compatibility_checker.py +403 -0
  15. kailash/migration/tests/test_integration.py +463 -0
  16. kailash/migration/tests/test_migration_assistant.py +397 -0
  17. kailash/migration/tests/test_performance_comparator.py +433 -0
  18. kailash/monitoring/__init__.py +29 -2
  19. kailash/monitoring/asyncsql_metrics.py +275 -0
  20. kailash/nodes/data/async_sql.py +1828 -33
  21. kailash/runtime/local.py +1255 -8
  22. kailash/runtime/monitoring/__init__.py +1 -0
  23. kailash/runtime/monitoring/runtime_monitor.py +780 -0
  24. kailash/runtime/resource_manager.py +3033 -0
  25. kailash/sdk_exceptions.py +21 -0
  26. kailash/workflow/cyclic_runner.py +18 -2
  27. {kailash-0.9.15.dist-info → kailash-0.9.17.dist-info}/METADATA +1 -1
  28. {kailash-0.9.15.dist-info → kailash-0.9.17.dist-info}/RECORD +33 -14
  29. {kailash-0.9.15.dist-info → kailash-0.9.17.dist-info}/WHEEL +0 -0
  30. {kailash-0.9.15.dist-info → kailash-0.9.17.dist-info}/entry_points.txt +0 -0
  31. {kailash-0.9.15.dist-info → kailash-0.9.17.dist-info}/licenses/LICENSE +0 -0
  32. {kailash-0.9.15.dist-info → kailash-0.9.17.dist-info}/licenses/NOTICE +0 -0
  33. {kailash-0.9.15.dist-info → kailash-0.9.17.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1141 @@
1
+ """Regression detection system for post-migration validation.
2
+
3
+ This module provides comprehensive regression detection capabilities to identify
4
+ issues that may have been introduced during LocalRuntime migration, including
5
+ performance regressions, functional regressions, and configuration issues.
6
+ """
7
+
8
+ import hashlib
9
+ import json
10
+ import pickle
11
+ import statistics
12
+ import threading
13
+ import time
14
+ from concurrent.futures import ThreadPoolExecutor, as_completed
15
+ from dataclasses import dataclass, field
16
+ from datetime import datetime, timezone
17
+ from enum import Enum
18
+ from pathlib import Path
19
+ from typing import Any, Dict, List, Optional, Set, Tuple, Union
20
+
21
+ from kailash.runtime.local import LocalRuntime
22
+ from kailash.workflow import Workflow
23
+ from kailash.workflow.builder import WorkflowBuilder
24
+
25
+
26
+ class RegressionType(str, Enum):
27
+ """Types of regressions that can be detected."""
28
+
29
+ PERFORMANCE = "performance"
30
+ FUNCTIONAL = "functional"
31
+ CONFIGURATION = "configuration"
32
+ SECURITY = "security"
33
+ RESOURCE = "resource"
34
+ COMPATIBILITY = "compatibility"
35
+
36
+
37
+ class RegressionSeverity(str, Enum):
38
+ """Severity levels for regression issues."""
39
+
40
+ CRITICAL = "critical" # System unusable
41
+ HIGH = "high" # Major functionality affected
42
+ MEDIUM = "medium" # Minor functionality affected
43
+ LOW = "low" # Cosmetic or edge case issues
44
+
45
+
46
+ @dataclass
47
+ class RegressionIssue:
48
+ """Represents a detected regression."""
49
+
50
+ regression_type: RegressionType
51
+ severity: RegressionSeverity
52
+ test_name: str
53
+ description: str
54
+ expected_value: Any
55
+ actual_value: Any
56
+ threshold: float
57
+ deviation_percentage: float
58
+ metadata: Dict[str, Any] = field(default_factory=dict)
59
+ detected_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
60
+
61
+
62
+ @dataclass
63
+ class BaselineSnapshot:
64
+ """Baseline snapshot for regression comparison."""
65
+
66
+ test_name: str
67
+ workflow_hash: str
68
+ configuration: Dict[str, Any]
69
+ results: Dict[str, Any]
70
+ performance_metrics: Dict[str, float]
71
+ resource_usage: Dict[str, float]
72
+ timestamp: datetime
73
+ metadata: Dict[str, Any] = field(default_factory=dict)
74
+
75
+
76
+ @dataclass
77
+ class RegressionReport:
78
+ """Comprehensive regression detection report."""
79
+
80
+ total_tests: int
81
+ passed_tests: int
82
+ failed_tests: int
83
+ regression_issues: List[RegressionIssue] = field(default_factory=list)
84
+ baseline_missing: List[str] = field(default_factory=list)
85
+ test_summary: Dict[str, Dict] = field(default_factory=dict)
86
+ overall_status: str = "unknown"
87
+ generated_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
88
+
89
+
90
+ class RegressionDetector:
91
+ """Comprehensive regression detection system."""
92
+
93
+ def __init__(
94
+ self,
95
+ baseline_path: Union[str, Path] = "migration_baseline.json",
96
+ performance_threshold: float = 10.0, # % degradation threshold
97
+ resource_threshold: float = 20.0, # % resource increase threshold
98
+ parallel_tests: bool = True,
99
+ max_workers: int = 4,
100
+ ):
101
+ """Initialize the regression detector.
102
+
103
+ Args:
104
+ baseline_path: Path to store/load baseline data
105
+ performance_threshold: Performance regression threshold (%)
106
+ resource_threshold: Resource usage regression threshold (%)
107
+ parallel_tests: Whether to run tests in parallel
108
+ max_workers: Maximum number of parallel workers
109
+ """
110
+ self.baseline_path = Path(baseline_path)
111
+ self.performance_threshold = performance_threshold
112
+ self.resource_threshold = resource_threshold
113
+ self.parallel_tests = parallel_tests
114
+ self.max_workers = max_workers
115
+
116
+ # Baseline data storage
117
+ self.baselines: Dict[str, BaselineSnapshot] = {}
118
+ self._load_baselines()
119
+
120
+ # Test workflows for regression testing
121
+ self.test_workflows = self._create_test_workflows()
122
+
123
+ # Performance tracking
124
+ self.performance_history: Dict[str, List[float]] = {}
125
+
126
+ # Regression thresholds by type
127
+ self.thresholds = {
128
+ RegressionType.PERFORMANCE: performance_threshold,
129
+ RegressionType.RESOURCE: resource_threshold,
130
+ RegressionType.FUNCTIONAL: 0.0, # Any functional change is a regression
131
+ RegressionType.CONFIGURATION: 0.0,
132
+ RegressionType.SECURITY: 0.0,
133
+ RegressionType.COMPATIBILITY: 0.0,
134
+ }
135
+
136
+ def create_baseline(
137
+ self,
138
+ config: Dict[str, Any],
139
+ custom_workflows: Optional[List[Tuple[str, Workflow]]] = None,
140
+ ) -> Dict[str, BaselineSnapshot]:
141
+ """Create baseline snapshots for regression detection.
142
+
143
+ Args:
144
+ config: LocalRuntime configuration to baseline
145
+ custom_workflows: Optional custom workflows to include
146
+
147
+ Returns:
148
+ Dictionary of baseline snapshots
149
+ """
150
+ workflows = custom_workflows or self.test_workflows
151
+ baselines = {}
152
+
153
+ print(f"Creating baseline with {len(workflows)} test workflows...")
154
+
155
+ for test_name, workflow in workflows:
156
+ print(f" Creating baseline for: {test_name}")
157
+ baseline = self._create_baseline_snapshot(test_name, workflow, config)
158
+ if baseline:
159
+ baselines[test_name] = baseline
160
+ self.baselines[test_name] = baseline
161
+
162
+ # Save baselines to disk
163
+ self._save_baselines()
164
+
165
+ return baselines
166
+
167
+ def detect_regressions(
168
+ self,
169
+ config: Dict[str, Any],
170
+ custom_workflows: Optional[List[Tuple[str, Workflow]]] = None,
171
+ ) -> RegressionReport:
172
+ """Detect regressions by comparing against baseline.
173
+
174
+ Args:
175
+ config: Current LocalRuntime configuration
176
+ custom_workflows: Optional custom workflows to test
177
+
178
+ Returns:
179
+ Comprehensive regression report
180
+ """
181
+ workflows = custom_workflows or self.test_workflows
182
+
183
+ report = RegressionReport(
184
+ total_tests=len(workflows), passed_tests=0, failed_tests=0
185
+ )
186
+
187
+ # Run tests and detect regressions
188
+ if self.parallel_tests:
189
+ self._run_parallel_regression_tests(workflows, config, report)
190
+ else:
191
+ self._run_sequential_regression_tests(workflows, config, report)
192
+
193
+ # Analyze overall status
194
+ report.overall_status = self._determine_overall_status(report)
195
+
196
+ return report
197
+
198
+ def _create_test_workflows(self) -> List[Tuple[str, Workflow]]:
199
+ """Create standard test workflows for regression testing."""
200
+ workflows = []
201
+
202
+ # Simple execution test
203
+ simple_builder = WorkflowBuilder()
204
+ simple_builder.add_node(
205
+ "PythonCodeNode",
206
+ "simple",
207
+ {"code": "result = 'hello_world'", "output_key": "message"},
208
+ )
209
+ workflows.append(("simple_execution", simple_builder.build()))
210
+
211
+ # Performance test
212
+ perf_builder = WorkflowBuilder()
213
+ perf_builder.add_node(
214
+ "PythonCodeNode",
215
+ "performance",
216
+ {
217
+ "code": """
218
+ import time
219
+ start = time.time()
220
+ # Simulate work
221
+ result = sum(i*i for i in range(10000))
222
+ duration = time.time() - start
223
+ """,
224
+ "output_key": "calculation_result",
225
+ },
226
+ )
227
+ workflows.append(("performance_test", perf_builder.build()))
228
+
229
+ # Memory test
230
+ memory_builder = WorkflowBuilder()
231
+ memory_builder.add_node(
232
+ "PythonCodeNode",
233
+ "memory",
234
+ {
235
+ "code": """
236
+ import gc
237
+ # Create and cleanup large object
238
+ large_data = [list(range(1000)) for _ in range(100)]
239
+ result = len(large_data)
240
+ del large_data
241
+ gc.collect()
242
+ """,
243
+ "output_key": "memory_result",
244
+ },
245
+ )
246
+ workflows.append(("memory_test", memory_builder.build()))
247
+
248
+ # Error handling test
249
+ error_builder = WorkflowBuilder()
250
+ error_builder.add_node(
251
+ "PythonCodeNode",
252
+ "error_handling",
253
+ {
254
+ "code": """
255
+ try:
256
+ result = 1 / 0
257
+ except ZeroDivisionError:
258
+ result = "error_handled_correctly"
259
+ """,
260
+ "output_key": "error_result",
261
+ },
262
+ )
263
+ workflows.append(("error_handling_test", error_builder.build()))
264
+
265
+ # Multi-node workflow test
266
+ multi_builder = WorkflowBuilder()
267
+ multi_builder.add_node(
268
+ "PythonCodeNode",
269
+ "step1",
270
+ {"code": "result = [1, 2, 3, 4, 5]", "output_key": "numbers"},
271
+ )
272
+ multi_builder.add_node(
273
+ "PythonCodeNode",
274
+ "step2",
275
+ {
276
+ "code": "result = [x * 2 for x in numbers]",
277
+ "input_mapping": {"numbers": "step1.numbers"},
278
+ "output_key": "doubled",
279
+ },
280
+ )
281
+ multi_builder.add_node(
282
+ "PythonCodeNode",
283
+ "step3",
284
+ {
285
+ "code": "result = sum(doubled)",
286
+ "input_mapping": {"doubled": "step2.doubled"},
287
+ "output_key": "sum_result",
288
+ },
289
+ )
290
+ workflows.append(("multi_node_test", multi_builder.build()))
291
+
292
+ # Configuration sensitivity test
293
+ config_builder = WorkflowBuilder()
294
+ config_builder.add_node(
295
+ "PythonCodeNode",
296
+ "config_test",
297
+ {
298
+ "code": """
299
+ import os
300
+ import threading
301
+ # Test configuration-sensitive operations
302
+ result = {
303
+ 'thread_id': threading.get_ident(),
304
+ 'process_id': os.getpid(),
305
+ 'environment_ready': True
306
+ }
307
+ """,
308
+ "output_key": "config_result",
309
+ },
310
+ )
311
+ workflows.append(("configuration_test", config_builder.build()))
312
+
313
+ return workflows
314
+
315
+ def _create_baseline_snapshot(
316
+ self, test_name: str, workflow: Workflow, config: Dict[str, Any]
317
+ ) -> Optional[BaselineSnapshot]:
318
+ """Create a baseline snapshot for a single test."""
319
+ try:
320
+ # Create workflow hash for change detection
321
+ workflow_hash = self._hash_workflow(workflow)
322
+
323
+ # Run test multiple times for stable metrics
324
+ runtime = LocalRuntime(**config)
325
+ execution_times = []
326
+ memory_usages = []
327
+ results_history = []
328
+
329
+ for _ in range(3): # 3 runs for stability
330
+ import psutil
331
+
332
+ process = psutil.Process()
333
+
334
+ # Measure before execution
335
+ memory_before = process.memory_info().rss / 1024 / 1024 # MB
336
+
337
+ # Execute workflow
338
+ start_time = time.perf_counter()
339
+ results, run_id = runtime.execute(workflow)
340
+ end_time = time.perf_counter()
341
+
342
+ # Measure after execution
343
+ memory_after = process.memory_info().rss / 1024 / 1024 # MB
344
+
345
+ execution_times.append((end_time - start_time) * 1000) # ms
346
+ memory_usages.append(memory_after - memory_before)
347
+ results_history.append(results)
348
+
349
+ # Calculate stable metrics
350
+ avg_execution_time = statistics.mean(execution_times)
351
+ avg_memory_usage = statistics.mean(memory_usages)
352
+
353
+ # Use first result as baseline (assuming deterministic workflows)
354
+ baseline_results = results_history[0]
355
+
356
+ # Create snapshot
357
+ snapshot = BaselineSnapshot(
358
+ test_name=test_name,
359
+ workflow_hash=workflow_hash,
360
+ configuration=config.copy(),
361
+ results=baseline_results,
362
+ performance_metrics={
363
+ "execution_time_ms": avg_execution_time,
364
+ "execution_time_stddev": (
365
+ statistics.stdev(execution_times)
366
+ if len(execution_times) > 1
367
+ else 0.0
368
+ ),
369
+ },
370
+ resource_usage={
371
+ "memory_usage_mb": avg_memory_usage,
372
+ "memory_stddev": (
373
+ statistics.stdev(memory_usages)
374
+ if len(memory_usages) > 1
375
+ else 0.0
376
+ ),
377
+ },
378
+ timestamp=datetime.now(timezone.utc),
379
+ metadata={
380
+ "runs": len(execution_times),
381
+ "config_hash": self._hash_config(config),
382
+ },
383
+ )
384
+
385
+ return snapshot
386
+
387
+ except Exception as e:
388
+ print(f"Failed to create baseline for {test_name}: {str(e)}")
389
+ return None
390
+
391
+ def _run_parallel_regression_tests(
392
+ self,
393
+ workflows: List[Tuple[str, Workflow]],
394
+ config: Dict[str, Any],
395
+ report: RegressionReport,
396
+ ) -> None:
397
+ """Run regression tests in parallel."""
398
+ with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
399
+ # Submit all test jobs
400
+ future_to_test = {
401
+ executor.submit(
402
+ self._run_single_regression_test, test_name, workflow, config
403
+ ): test_name
404
+ for test_name, workflow in workflows
405
+ }
406
+
407
+ # Collect results
408
+ for future in as_completed(future_to_test):
409
+ test_name = future_to_test[future]
410
+ try:
411
+ test_result = future.result()
412
+ self._process_test_result(test_name, test_result, report)
413
+ except Exception as e:
414
+ # Add test failure
415
+ report.failed_tests += 1
416
+ report.test_summary[test_name] = {
417
+ "status": "error",
418
+ "error": str(e),
419
+ }
420
+
421
+ def _run_sequential_regression_tests(
422
+ self,
423
+ workflows: List[Tuple[str, Workflow]],
424
+ config: Dict[str, Any],
425
+ report: RegressionReport,
426
+ ) -> None:
427
+ """Run regression tests sequentially."""
428
+ for test_name, workflow in workflows:
429
+ try:
430
+ test_result = self._run_single_regression_test(
431
+ test_name, workflow, config
432
+ )
433
+ self._process_test_result(test_name, test_result, report)
434
+ except Exception as e:
435
+ report.failed_tests += 1
436
+ report.test_summary[test_name] = {"status": "error", "error": str(e)}
437
+
438
+ def _run_single_regression_test(
439
+ self, test_name: str, workflow: Workflow, config: Dict[str, Any]
440
+ ) -> Dict[str, Any]:
441
+ """Run a single regression test and return results."""
442
+ # Get baseline for comparison
443
+ baseline = self.baselines.get(test_name)
444
+ if not baseline:
445
+ return {
446
+ "status": "no_baseline",
447
+ "message": f"No baseline found for test {test_name}",
448
+ }
449
+
450
+ # Check workflow consistency
451
+ current_hash = self._hash_workflow(workflow)
452
+ if current_hash != baseline.workflow_hash:
453
+ return {
454
+ "status": "workflow_changed",
455
+ "message": "Workflow has changed since baseline creation",
456
+ "baseline_hash": baseline.workflow_hash,
457
+ "current_hash": current_hash,
458
+ }
459
+
460
+ # Run test
461
+ runtime = LocalRuntime(**config)
462
+
463
+ # Performance measurement
464
+ import psutil
465
+
466
+ process = psutil.Process()
467
+
468
+ # Multiple runs for stability
469
+ execution_times = []
470
+ memory_usages = []
471
+ results_history = []
472
+
473
+ for _ in range(3):
474
+ memory_before = process.memory_info().rss / 1024 / 1024
475
+
476
+ start_time = time.perf_counter()
477
+ results, run_id = runtime.execute(workflow)
478
+ end_time = time.perf_counter()
479
+
480
+ memory_after = process.memory_info().rss / 1024 / 1024
481
+
482
+ execution_times.append((end_time - start_time) * 1000)
483
+ memory_usages.append(memory_after - memory_before)
484
+ results_history.append(results)
485
+
486
+ # Calculate current metrics
487
+ current_metrics = {
488
+ "execution_time_ms": statistics.mean(execution_times),
489
+ "memory_usage_mb": statistics.mean(memory_usages),
490
+ "results": results_history[0], # Use first result for consistency
491
+ }
492
+
493
+ return {
494
+ "status": "completed",
495
+ "baseline": baseline,
496
+ "current": current_metrics,
497
+ "runs": len(execution_times),
498
+ }
499
+
500
+ def _process_test_result(
501
+ self, test_name: str, test_result: Dict[str, Any], report: RegressionReport
502
+ ) -> None:
503
+ """Process a single test result and detect regressions."""
504
+ if test_result["status"] == "no_baseline":
505
+ report.baseline_missing.append(test_name)
506
+ report.test_summary[test_name] = test_result
507
+ return
508
+
509
+ if test_result["status"] != "completed":
510
+ report.failed_tests += 1
511
+ report.test_summary[test_name] = test_result
512
+ return
513
+
514
+ baseline = test_result["baseline"]
515
+ current = test_result["current"]
516
+
517
+ # Detect performance regressions
518
+ performance_issues = self._detect_performance_regressions(
519
+ test_name, baseline, current
520
+ )
521
+ report.regression_issues.extend(performance_issues)
522
+
523
+ # Detect functional regressions
524
+ functional_issues = self._detect_functional_regressions(
525
+ test_name, baseline, current
526
+ )
527
+ report.regression_issues.extend(functional_issues)
528
+
529
+ # Detect resource regressions
530
+ resource_issues = self._detect_resource_regressions(
531
+ test_name, baseline, current
532
+ )
533
+ report.regression_issues.extend(resource_issues)
534
+
535
+ # Determine test status
536
+ has_critical = any(
537
+ issue.severity == RegressionSeverity.CRITICAL
538
+ for issue in performance_issues + functional_issues + resource_issues
539
+ )
540
+ has_high = any(
541
+ issue.severity == RegressionSeverity.HIGH
542
+ for issue in performance_issues + functional_issues + resource_issues
543
+ )
544
+
545
+ if has_critical:
546
+ test_status = "critical_regression"
547
+ report.failed_tests += 1
548
+ elif has_high:
549
+ test_status = "high_regression"
550
+ report.failed_tests += 1
551
+ elif performance_issues or functional_issues or resource_issues:
552
+ test_status = "minor_regression"
553
+ report.passed_tests += 1
554
+ else:
555
+ test_status = "passed"
556
+ report.passed_tests += 1
557
+
558
+ report.test_summary[test_name] = {
559
+ "status": test_status,
560
+ "issues_found": len(
561
+ performance_issues + functional_issues + resource_issues
562
+ ),
563
+ "execution_time_change": self._calculate_percentage_change(
564
+ baseline.performance_metrics.get("execution_time_ms", 0),
565
+ current["execution_time_ms"],
566
+ ),
567
+ "memory_change": self._calculate_percentage_change(
568
+ baseline.resource_usage.get("memory_usage_mb", 0),
569
+ current["memory_usage_mb"],
570
+ ),
571
+ }
572
+
573
+ def _detect_performance_regressions(
574
+ self, test_name: str, baseline: BaselineSnapshot, current: Dict[str, Any]
575
+ ) -> List[RegressionIssue]:
576
+ """Detect performance regressions."""
577
+ issues = []
578
+
579
+ # Execution time regression
580
+ baseline_time = baseline.performance_metrics.get("execution_time_ms", 0)
581
+ current_time = current["execution_time_ms"]
582
+ time_change = self._calculate_percentage_change(baseline_time, current_time)
583
+
584
+ if time_change > self.performance_threshold:
585
+ severity = self._determine_severity(time_change, self.performance_threshold)
586
+ issues.append(
587
+ RegressionIssue(
588
+ regression_type=RegressionType.PERFORMANCE,
589
+ severity=severity,
590
+ test_name=test_name,
591
+ description=f"Execution time increased by {time_change:.1f}%",
592
+ expected_value=baseline_time,
593
+ actual_value=current_time,
594
+ threshold=self.performance_threshold,
595
+ deviation_percentage=time_change,
596
+ metadata={"metric": "execution_time_ms"},
597
+ )
598
+ )
599
+
600
+ return issues
601
+
602
+ def _detect_functional_regressions(
603
+ self, test_name: str, baseline: BaselineSnapshot, current: Dict[str, Any]
604
+ ) -> List[RegressionIssue]:
605
+ """Detect functional regressions."""
606
+ issues = []
607
+
608
+ baseline_results = baseline.results
609
+ current_results = current["results"]
610
+
611
+ # Deep comparison of results
612
+ differences = self._deep_compare_results(baseline_results, current_results)
613
+
614
+ for diff in differences:
615
+ # Any functional change is considered a regression
616
+ severity = (
617
+ RegressionSeverity.HIGH
618
+ if diff["critical"]
619
+ else RegressionSeverity.MEDIUM
620
+ )
621
+
622
+ issues.append(
623
+ RegressionIssue(
624
+ regression_type=RegressionType.FUNCTIONAL,
625
+ severity=severity,
626
+ test_name=test_name,
627
+ description=f"Functional change detected: {diff['description']}",
628
+ expected_value=diff["expected"],
629
+ actual_value=diff["actual"],
630
+ threshold=0.0,
631
+ deviation_percentage=100.0, # Functional changes are 100% different
632
+ metadata={"path": diff["path"], "change_type": diff["type"]},
633
+ )
634
+ )
635
+
636
+ return issues
637
+
638
+ def _detect_resource_regressions(
639
+ self, test_name: str, baseline: BaselineSnapshot, current: Dict[str, Any]
640
+ ) -> List[RegressionIssue]:
641
+ """Detect resource usage regressions."""
642
+ issues = []
643
+
644
+ # Memory usage regression
645
+ baseline_memory = baseline.resource_usage.get("memory_usage_mb", 0)
646
+ current_memory = current["memory_usage_mb"]
647
+ memory_change = self._calculate_percentage_change(
648
+ baseline_memory, current_memory
649
+ )
650
+
651
+ if memory_change > self.resource_threshold:
652
+ severity = self._determine_severity(memory_change, self.resource_threshold)
653
+ issues.append(
654
+ RegressionIssue(
655
+ regression_type=RegressionType.RESOURCE,
656
+ severity=severity,
657
+ test_name=test_name,
658
+ description=f"Memory usage increased by {memory_change:.1f}%",
659
+ expected_value=baseline_memory,
660
+ actual_value=current_memory,
661
+ threshold=self.resource_threshold,
662
+ deviation_percentage=memory_change,
663
+ metadata={"metric": "memory_usage_mb"},
664
+ )
665
+ )
666
+
667
+ return issues
668
+
669
+ def _calculate_percentage_change(self, baseline: float, current: float) -> float:
670
+ """Calculate percentage change from baseline."""
671
+ if baseline == 0:
672
+ return 0.0 if current == 0 else float("inf")
673
+ return ((current - baseline) / baseline) * 100
674
+
675
+ def _determine_severity(
676
+ self, change_percentage: float, threshold: float
677
+ ) -> RegressionSeverity:
678
+ """Determine severity based on change percentage."""
679
+ if change_percentage >= threshold * 4:
680
+ return RegressionSeverity.CRITICAL
681
+ elif change_percentage >= threshold * 2:
682
+ return RegressionSeverity.HIGH
683
+ elif change_percentage >= threshold:
684
+ return RegressionSeverity.MEDIUM
685
+ else:
686
+ return RegressionSeverity.LOW
687
+
688
+ def _deep_compare_results(
689
+ self, baseline: Dict, current: Dict
690
+ ) -> List[Dict[str, Any]]:
691
+ """Deep comparison of result dictionaries."""
692
+ differences = []
693
+
694
+ def compare_recursive(base_obj, curr_obj, path=""):
695
+ if type(base_obj) != type(curr_obj):
696
+ differences.append(
697
+ {
698
+ "path": path,
699
+ "type": "type_change",
700
+ "description": f"Type changed from {type(base_obj).__name__} to {type(curr_obj).__name__}",
701
+ "expected": type(base_obj).__name__,
702
+ "actual": type(curr_obj).__name__,
703
+ "critical": True,
704
+ }
705
+ )
706
+ return
707
+
708
+ if isinstance(base_obj, dict):
709
+ # Check for missing keys
710
+ base_keys = set(base_obj.keys())
711
+ curr_keys = set(curr_obj.keys())
712
+
713
+ missing_keys = base_keys - curr_keys
714
+ new_keys = curr_keys - base_keys
715
+
716
+ for key in missing_keys:
717
+ differences.append(
718
+ {
719
+ "path": f"{path}.{key}" if path else key,
720
+ "type": "missing_key",
721
+ "description": f'Key "{key}" missing from results',
722
+ "expected": base_obj[key],
723
+ "actual": None,
724
+ "critical": True,
725
+ }
726
+ )
727
+
728
+ for key in new_keys:
729
+ differences.append(
730
+ {
731
+ "path": f"{path}.{key}" if path else key,
732
+ "type": "new_key",
733
+ "description": f'Unexpected key "{key}" in results',
734
+ "expected": None,
735
+ "actual": curr_obj[key],
736
+ "critical": False,
737
+ }
738
+ )
739
+
740
+ # Compare common keys
741
+ for key in base_keys & curr_keys:
742
+ new_path = f"{path}.{key}" if path else key
743
+ compare_recursive(base_obj[key], curr_obj[key], new_path)
744
+
745
+ elif isinstance(base_obj, list):
746
+ if len(base_obj) != len(curr_obj):
747
+ differences.append(
748
+ {
749
+ "path": path,
750
+ "type": "length_change",
751
+ "description": f"List length changed from {len(base_obj)} to {len(curr_obj)}",
752
+ "expected": len(base_obj),
753
+ "actual": len(curr_obj),
754
+ "critical": True,
755
+ }
756
+ )
757
+ return
758
+
759
+ for i, (base_item, curr_item) in enumerate(zip(base_obj, curr_obj)):
760
+ compare_recursive(base_item, curr_item, f"{path}[{i}]")
761
+
762
+ else:
763
+ # Compare primitive values
764
+ if base_obj != curr_obj:
765
+ differences.append(
766
+ {
767
+ "path": path,
768
+ "type": "value_change",
769
+ "description": f'Value changed from "{base_obj}" to "{curr_obj}"',
770
+ "expected": base_obj,
771
+ "actual": curr_obj,
772
+ "critical": False,
773
+ }
774
+ )
775
+
776
+ compare_recursive(baseline, current)
777
+ return differences
778
+
779
+ def _determine_overall_status(self, report: RegressionReport) -> str:
780
+ """Determine overall status from report."""
781
+ critical_issues = len(
782
+ [
783
+ i
784
+ for i in report.regression_issues
785
+ if i.severity == RegressionSeverity.CRITICAL
786
+ ]
787
+ )
788
+ high_issues = len(
789
+ [
790
+ i
791
+ for i in report.regression_issues
792
+ if i.severity == RegressionSeverity.HIGH
793
+ ]
794
+ )
795
+
796
+ if critical_issues > 0:
797
+ return "critical_regressions"
798
+ elif high_issues > 0:
799
+ return "high_regressions"
800
+ elif len(report.regression_issues) > 0:
801
+ return "minor_regressions"
802
+ elif report.failed_tests > 0:
803
+ return "test_failures"
804
+ elif len(report.baseline_missing) > 0:
805
+ return "missing_baselines"
806
+ else:
807
+ return "all_passed"
808
+
809
+ def _hash_workflow(self, workflow: Workflow) -> str:
810
+ """Create a hash of the workflow for change detection."""
811
+ # Convert workflow to a deterministic string representation
812
+ workflow_str = json.dumps(workflow.to_dict(), sort_keys=True)
813
+ return hashlib.sha256(workflow_str.encode()).hexdigest()[:16]
814
+
815
+ def _hash_config(self, config: Dict[str, Any]) -> str:
816
+ """Create a hash of the configuration."""
817
+ config_str = json.dumps(config, sort_keys=True, default=str)
818
+ return hashlib.sha256(config_str.encode()).hexdigest()[:16]
819
+
820
+ def _save_baselines(self) -> None:
821
+ """Save baselines to disk."""
822
+ baseline_data = {}
823
+ for test_name, baseline in self.baselines.items():
824
+ baseline_data[test_name] = {
825
+ "test_name": baseline.test_name,
826
+ "workflow_hash": baseline.workflow_hash,
827
+ "configuration": baseline.configuration,
828
+ "results": baseline.results,
829
+ "performance_metrics": baseline.performance_metrics,
830
+ "resource_usage": baseline.resource_usage,
831
+ "timestamp": baseline.timestamp.isoformat(),
832
+ "metadata": baseline.metadata,
833
+ }
834
+
835
+ with open(self.baseline_path, "w") as f:
836
+ json.dump(baseline_data, f, indent=2, default=str)
837
+
838
+ def _load_baselines(self) -> None:
839
+ """Load baselines from disk."""
840
+ if not self.baseline_path.exists():
841
+ return
842
+
843
+ try:
844
+ with open(self.baseline_path, "r") as f:
845
+ baseline_data = json.load(f)
846
+
847
+ for test_name, data in baseline_data.items():
848
+ baseline = BaselineSnapshot(
849
+ test_name=data["test_name"],
850
+ workflow_hash=data["workflow_hash"],
851
+ configuration=data["configuration"],
852
+ results=data["results"],
853
+ performance_metrics=data["performance_metrics"],
854
+ resource_usage=data["resource_usage"],
855
+ timestamp=datetime.fromisoformat(data["timestamp"]),
856
+ metadata=data["metadata"],
857
+ )
858
+ self.baselines[test_name] = baseline
859
+
860
+ except Exception as e:
861
+ print(f"Failed to load baselines: {str(e)}")
862
+
863
+ def generate_regression_report(
864
+ self, report: RegressionReport, format: str = "text"
865
+ ) -> str:
866
+ """Generate a comprehensive regression report.
867
+
868
+ Args:
869
+ report: Regression detection report
870
+ format: Report format ("text", "json", "markdown")
871
+
872
+ Returns:
873
+ Formatted regression report
874
+ """
875
+ if format == "json":
876
+ return self._generate_json_report(report)
877
+ elif format == "markdown":
878
+ return self._generate_markdown_report(report)
879
+ else:
880
+ return self._generate_text_report(report)
881
+
882
+ def _generate_text_report(self, report: RegressionReport) -> str:
883
+ """Generate text format regression report."""
884
+ lines = []
885
+ lines.append("=" * 60)
886
+ lines.append("LocalRuntime Regression Detection Report")
887
+ lines.append("=" * 60)
888
+ lines.append("")
889
+
890
+ # Executive summary
891
+ lines.append("EXECUTIVE SUMMARY")
892
+ lines.append("-" * 20)
893
+ lines.append(
894
+ f"Overall Status: {report.overall_status.upper().replace('_', ' ')}"
895
+ )
896
+ lines.append(f"Total Tests: {report.total_tests}")
897
+ lines.append(f"Passed: {report.passed_tests}")
898
+ lines.append(f"Failed: {report.failed_tests}")
899
+ lines.append(f"Missing Baselines: {len(report.baseline_missing)}")
900
+ lines.append(f"Regression Issues: {len(report.regression_issues)}")
901
+ lines.append("")
902
+
903
+ # Regression issues by severity
904
+ if report.regression_issues:
905
+ lines.append("REGRESSION ISSUES")
906
+ lines.append("-" * 20)
907
+
908
+ for severity in RegressionSeverity:
909
+ severity_issues = [
910
+ i for i in report.regression_issues if i.severity == severity
911
+ ]
912
+ if severity_issues:
913
+ lines.append(
914
+ f"\n{severity.value.upper()} ISSUES ({len(severity_issues)}):"
915
+ )
916
+ for issue in severity_issues:
917
+ lines.append(f" • {issue.test_name}: {issue.description}")
918
+ lines.append(f" Expected: {issue.expected_value}")
919
+ lines.append(f" Actual: {issue.actual_value}")
920
+ lines.append(
921
+ f" Deviation: {issue.deviation_percentage:.1f}%"
922
+ )
923
+ lines.append("")
924
+
925
+ # Test summary
926
+ lines.append("TEST SUMMARY")
927
+ lines.append("-" * 15)
928
+ for test_name, summary in report.test_summary.items():
929
+ status_icon = {
930
+ "passed": "✅",
931
+ "critical_regression": "🚨",
932
+ "high_regression": "⚠️",
933
+ "minor_regression": "⚠️",
934
+ "error": "❌",
935
+ "no_baseline": "❓",
936
+ }.get(summary["status"], "❓")
937
+
938
+ lines.append(
939
+ f"{status_icon} {test_name}: {summary['status'].replace('_', ' ').title()}"
940
+ )
941
+
942
+ if "execution_time_change" in summary:
943
+ lines.append(
944
+ f" Performance: {summary['execution_time_change']:+.1f}%"
945
+ )
946
+ if "memory_change" in summary:
947
+ lines.append(f" Memory: {summary['memory_change']:+.1f}%")
948
+ if "issues_found" in summary:
949
+ lines.append(f" Issues: {summary['issues_found']}")
950
+ lines.append("")
951
+
952
+ # Recommendations
953
+ lines.append("RECOMMENDATIONS")
954
+ lines.append("-" * 18)
955
+
956
+ critical_issues = len(
957
+ [
958
+ i
959
+ for i in report.regression_issues
960
+ if i.severity == RegressionSeverity.CRITICAL
961
+ ]
962
+ )
963
+ high_issues = len(
964
+ [
965
+ i
966
+ for i in report.regression_issues
967
+ if i.severity == RegressionSeverity.HIGH
968
+ ]
969
+ )
970
+
971
+ if critical_issues > 0:
972
+ lines.append("🚨 CRITICAL: Migration should be rolled back immediately")
973
+ lines.append(
974
+ " - Critical regressions detected that affect system functionality"
975
+ )
976
+ lines.append(
977
+ " - Investigate and resolve issues before retrying migration"
978
+ )
979
+ elif high_issues > 0:
980
+ lines.append(
981
+ "⚠️ HIGH PRIORITY: Address high-severity issues before production"
982
+ )
983
+ lines.append(" - Significant regressions detected")
984
+ lines.append(" - Consider additional testing and optimization")
985
+ elif len(report.regression_issues) > 0:
986
+ lines.append("ℹ️ MINOR: Migration successful with minor regressions")
987
+ lines.append(" - Monitor system behavior in production")
988
+ lines.append(" - Consider performance optimizations")
989
+ else:
990
+ lines.append("✅ SUCCESS: Migration completed without regressions")
991
+ lines.append(" - System is performing as expected")
992
+ lines.append(" - Safe to proceed with production deployment")
993
+
994
+ return "\n".join(lines)
995
+
996
+ def _generate_json_report(self, report: RegressionReport) -> str:
997
+ """Generate JSON format regression report."""
998
+ data = {
999
+ "summary": {
1000
+ "overall_status": report.overall_status,
1001
+ "total_tests": report.total_tests,
1002
+ "passed_tests": report.passed_tests,
1003
+ "failed_tests": report.failed_tests,
1004
+ "baseline_missing": len(report.baseline_missing),
1005
+ "regression_issues": len(report.regression_issues),
1006
+ "generated_at": report.generated_at.isoformat(),
1007
+ },
1008
+ "regression_issues": [
1009
+ {
1010
+ "type": issue.regression_type,
1011
+ "severity": issue.severity,
1012
+ "test_name": issue.test_name,
1013
+ "description": issue.description,
1014
+ "expected_value": issue.expected_value,
1015
+ "actual_value": issue.actual_value,
1016
+ "threshold": issue.threshold,
1017
+ "deviation_percentage": issue.deviation_percentage,
1018
+ "metadata": issue.metadata,
1019
+ }
1020
+ for issue in report.regression_issues
1021
+ ],
1022
+ "test_summary": report.test_summary,
1023
+ "baseline_missing": report.baseline_missing,
1024
+ }
1025
+
1026
+ return json.dumps(data, indent=2, default=str)
1027
+
1028
+ def _generate_markdown_report(self, report: RegressionReport) -> str:
1029
+ """Generate markdown format regression report."""
1030
+ lines = []
1031
+ lines.append("# LocalRuntime Regression Detection Report")
1032
+ lines.append("")
1033
+
1034
+ # Status badge
1035
+ status_emoji = {
1036
+ "all_passed": "🟢",
1037
+ "minor_regressions": "🟡",
1038
+ "high_regressions": "🟠",
1039
+ "critical_regressions": "🔴",
1040
+ "test_failures": "🔴",
1041
+ "missing_baselines": "⚪",
1042
+ }
1043
+
1044
+ emoji = status_emoji.get(report.overall_status, "⚪")
1045
+ lines.append(
1046
+ f"## {emoji} Status: {report.overall_status.replace('_', ' ').title()}"
1047
+ )
1048
+ lines.append("")
1049
+
1050
+ # Summary table
1051
+ lines.append("## Summary")
1052
+ lines.append("")
1053
+ lines.append("| Metric | Count |")
1054
+ lines.append("|--------|-------|")
1055
+ lines.append(f"| Total Tests | {report.total_tests} |")
1056
+ lines.append(f"| Passed | {report.passed_tests} |")
1057
+ lines.append(f"| Failed | {report.failed_tests} |")
1058
+ lines.append(f"| Missing Baselines | {len(report.baseline_missing)} |")
1059
+ lines.append(f"| Regression Issues | {len(report.regression_issues)} |")
1060
+ lines.append("")
1061
+
1062
+ # Regression issues
1063
+ if report.regression_issues:
1064
+ lines.append("## Regression Issues")
1065
+ lines.append("")
1066
+
1067
+ for severity in RegressionSeverity:
1068
+ severity_issues = [
1069
+ i for i in report.regression_issues if i.severity == severity
1070
+ ]
1071
+ if severity_issues:
1072
+ severity_emoji = {
1073
+ RegressionSeverity.CRITICAL: "🚨",
1074
+ RegressionSeverity.HIGH: "⚠️",
1075
+ RegressionSeverity.MEDIUM: "⚠️",
1076
+ RegressionSeverity.LOW: "ℹ️",
1077
+ }[severity]
1078
+
1079
+ lines.append(
1080
+ f"### {severity_emoji} {severity.value.title()} Issues"
1081
+ )
1082
+ lines.append("")
1083
+
1084
+ for issue in severity_issues:
1085
+ lines.append(f"**{issue.test_name}**: {issue.description}")
1086
+ lines.append("")
1087
+ lines.append(f"- **Expected**: {issue.expected_value}")
1088
+ lines.append(f"- **Actual**: {issue.actual_value}")
1089
+ lines.append(
1090
+ f"- **Deviation**: {issue.deviation_percentage:.1f}%"
1091
+ )
1092
+ lines.append(f"- **Type**: {issue.regression_type}")
1093
+ lines.append("")
1094
+
1095
+ # Test results
1096
+ lines.append("## Test Results")
1097
+ lines.append("")
1098
+ lines.append("| Test | Status | Performance Change | Memory Change |")
1099
+ lines.append("|------|--------|-------------------|---------------|")
1100
+
1101
+ for test_name, summary in report.test_summary.items():
1102
+ status_icon = {
1103
+ "passed": "✅",
1104
+ "critical_regression": "🚨",
1105
+ "high_regression": "⚠️",
1106
+ "minor_regression": "⚠️",
1107
+ "error": "❌",
1108
+ "no_baseline": "❓",
1109
+ }.get(summary["status"], "❓")
1110
+
1111
+ perf_change = summary.get("execution_time_change", "N/A")
1112
+ memory_change = summary.get("memory_change", "N/A")
1113
+
1114
+ if isinstance(perf_change, (int, float)):
1115
+ perf_change = f"{perf_change:+.1f}%"
1116
+ if isinstance(memory_change, (int, float)):
1117
+ memory_change = f"{memory_change:+.1f}%"
1118
+
1119
+ lines.append(
1120
+ f"| {test_name} | {status_icon} {summary['status'].replace('_', ' ').title()} | {perf_change} | {memory_change} |"
1121
+ )
1122
+
1123
+ return "\n".join(lines)
1124
+
1125
+ def save_report(
1126
+ self,
1127
+ report: RegressionReport,
1128
+ file_path: Union[str, Path],
1129
+ format: str = "json",
1130
+ ) -> None:
1131
+ """Save regression report to file.
1132
+
1133
+ Args:
1134
+ report: Regression report to save
1135
+ file_path: Output file path
1136
+ format: Report format ("text", "json", "markdown")
1137
+ """
1138
+ content = self.generate_regression_report(report, format)
1139
+
1140
+ with open(file_path, "w", encoding="utf-8") as f:
1141
+ f.write(content)