kailash 0.9.15__py3-none-any.whl → 0.9.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/middleware/database/base_models.py +7 -1
- kailash/migration/__init__.py +30 -0
- kailash/migration/cli.py +340 -0
- kailash/migration/compatibility_checker.py +662 -0
- kailash/migration/configuration_validator.py +837 -0
- kailash/migration/documentation_generator.py +1828 -0
- kailash/migration/examples/__init__.py +5 -0
- kailash/migration/examples/complete_migration_example.py +692 -0
- kailash/migration/migration_assistant.py +715 -0
- kailash/migration/performance_comparator.py +760 -0
- kailash/migration/regression_detector.py +1141 -0
- kailash/migration/tests/__init__.py +6 -0
- kailash/migration/tests/test_compatibility_checker.py +403 -0
- kailash/migration/tests/test_integration.py +463 -0
- kailash/migration/tests/test_migration_assistant.py +397 -0
- kailash/migration/tests/test_performance_comparator.py +433 -0
- kailash/nodes/data/async_sql.py +1507 -6
- kailash/runtime/local.py +1255 -8
- kailash/runtime/monitoring/__init__.py +1 -0
- kailash/runtime/monitoring/runtime_monitor.py +780 -0
- kailash/runtime/resource_manager.py +3033 -0
- kailash/sdk_exceptions.py +21 -0
- kailash/workflow/cyclic_runner.py +18 -2
- {kailash-0.9.15.dist-info → kailash-0.9.16.dist-info}/METADATA +1 -1
- {kailash-0.9.15.dist-info → kailash-0.9.16.dist-info}/RECORD +30 -12
- {kailash-0.9.15.dist-info → kailash-0.9.16.dist-info}/WHEEL +0 -0
- {kailash-0.9.15.dist-info → kailash-0.9.16.dist-info}/entry_points.txt +0 -0
- {kailash-0.9.15.dist-info → kailash-0.9.16.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.9.15.dist-info → kailash-0.9.16.dist-info}/licenses/NOTICE +0 -0
- {kailash-0.9.15.dist-info → kailash-0.9.16.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1141 @@
|
|
1
|
+
"""Regression detection system for post-migration validation.
|
2
|
+
|
3
|
+
This module provides comprehensive regression detection capabilities to identify
|
4
|
+
issues that may have been introduced during LocalRuntime migration, including
|
5
|
+
performance regressions, functional regressions, and configuration issues.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import hashlib
|
9
|
+
import json
|
10
|
+
import pickle
|
11
|
+
import statistics
|
12
|
+
import threading
|
13
|
+
import time
|
14
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
15
|
+
from dataclasses import dataclass, field
|
16
|
+
from datetime import datetime, timezone
|
17
|
+
from enum import Enum
|
18
|
+
from pathlib import Path
|
19
|
+
from typing import Any, Dict, List, Optional, Set, Tuple, Union
|
20
|
+
|
21
|
+
from kailash.runtime.local import LocalRuntime
|
22
|
+
from kailash.workflow import Workflow
|
23
|
+
from kailash.workflow.builder import WorkflowBuilder
|
24
|
+
|
25
|
+
|
26
|
+
class RegressionType(str, Enum):
|
27
|
+
"""Types of regressions that can be detected."""
|
28
|
+
|
29
|
+
PERFORMANCE = "performance"
|
30
|
+
FUNCTIONAL = "functional"
|
31
|
+
CONFIGURATION = "configuration"
|
32
|
+
SECURITY = "security"
|
33
|
+
RESOURCE = "resource"
|
34
|
+
COMPATIBILITY = "compatibility"
|
35
|
+
|
36
|
+
|
37
|
+
class RegressionSeverity(str, Enum):
|
38
|
+
"""Severity levels for regression issues."""
|
39
|
+
|
40
|
+
CRITICAL = "critical" # System unusable
|
41
|
+
HIGH = "high" # Major functionality affected
|
42
|
+
MEDIUM = "medium" # Minor functionality affected
|
43
|
+
LOW = "low" # Cosmetic or edge case issues
|
44
|
+
|
45
|
+
|
46
|
+
@dataclass
|
47
|
+
class RegressionIssue:
|
48
|
+
"""Represents a detected regression."""
|
49
|
+
|
50
|
+
regression_type: RegressionType
|
51
|
+
severity: RegressionSeverity
|
52
|
+
test_name: str
|
53
|
+
description: str
|
54
|
+
expected_value: Any
|
55
|
+
actual_value: Any
|
56
|
+
threshold: float
|
57
|
+
deviation_percentage: float
|
58
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
59
|
+
detected_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
60
|
+
|
61
|
+
|
62
|
+
@dataclass
|
63
|
+
class BaselineSnapshot:
|
64
|
+
"""Baseline snapshot for regression comparison."""
|
65
|
+
|
66
|
+
test_name: str
|
67
|
+
workflow_hash: str
|
68
|
+
configuration: Dict[str, Any]
|
69
|
+
results: Dict[str, Any]
|
70
|
+
performance_metrics: Dict[str, float]
|
71
|
+
resource_usage: Dict[str, float]
|
72
|
+
timestamp: datetime
|
73
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
74
|
+
|
75
|
+
|
76
|
+
@dataclass
|
77
|
+
class RegressionReport:
|
78
|
+
"""Comprehensive regression detection report."""
|
79
|
+
|
80
|
+
total_tests: int
|
81
|
+
passed_tests: int
|
82
|
+
failed_tests: int
|
83
|
+
regression_issues: List[RegressionIssue] = field(default_factory=list)
|
84
|
+
baseline_missing: List[str] = field(default_factory=list)
|
85
|
+
test_summary: Dict[str, Dict] = field(default_factory=dict)
|
86
|
+
overall_status: str = "unknown"
|
87
|
+
generated_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
88
|
+
|
89
|
+
|
90
|
+
class RegressionDetector:
|
91
|
+
"""Comprehensive regression detection system."""
|
92
|
+
|
93
|
+
def __init__(
|
94
|
+
self,
|
95
|
+
baseline_path: Union[str, Path] = "migration_baseline.json",
|
96
|
+
performance_threshold: float = 10.0, # % degradation threshold
|
97
|
+
resource_threshold: float = 20.0, # % resource increase threshold
|
98
|
+
parallel_tests: bool = True,
|
99
|
+
max_workers: int = 4,
|
100
|
+
):
|
101
|
+
"""Initialize the regression detector.
|
102
|
+
|
103
|
+
Args:
|
104
|
+
baseline_path: Path to store/load baseline data
|
105
|
+
performance_threshold: Performance regression threshold (%)
|
106
|
+
resource_threshold: Resource usage regression threshold (%)
|
107
|
+
parallel_tests: Whether to run tests in parallel
|
108
|
+
max_workers: Maximum number of parallel workers
|
109
|
+
"""
|
110
|
+
self.baseline_path = Path(baseline_path)
|
111
|
+
self.performance_threshold = performance_threshold
|
112
|
+
self.resource_threshold = resource_threshold
|
113
|
+
self.parallel_tests = parallel_tests
|
114
|
+
self.max_workers = max_workers
|
115
|
+
|
116
|
+
# Baseline data storage
|
117
|
+
self.baselines: Dict[str, BaselineSnapshot] = {}
|
118
|
+
self._load_baselines()
|
119
|
+
|
120
|
+
# Test workflows for regression testing
|
121
|
+
self.test_workflows = self._create_test_workflows()
|
122
|
+
|
123
|
+
# Performance tracking
|
124
|
+
self.performance_history: Dict[str, List[float]] = {}
|
125
|
+
|
126
|
+
# Regression thresholds by type
|
127
|
+
self.thresholds = {
|
128
|
+
RegressionType.PERFORMANCE: performance_threshold,
|
129
|
+
RegressionType.RESOURCE: resource_threshold,
|
130
|
+
RegressionType.FUNCTIONAL: 0.0, # Any functional change is a regression
|
131
|
+
RegressionType.CONFIGURATION: 0.0,
|
132
|
+
RegressionType.SECURITY: 0.0,
|
133
|
+
RegressionType.COMPATIBILITY: 0.0,
|
134
|
+
}
|
135
|
+
|
136
|
+
def create_baseline(
|
137
|
+
self,
|
138
|
+
config: Dict[str, Any],
|
139
|
+
custom_workflows: Optional[List[Tuple[str, Workflow]]] = None,
|
140
|
+
) -> Dict[str, BaselineSnapshot]:
|
141
|
+
"""Create baseline snapshots for regression detection.
|
142
|
+
|
143
|
+
Args:
|
144
|
+
config: LocalRuntime configuration to baseline
|
145
|
+
custom_workflows: Optional custom workflows to include
|
146
|
+
|
147
|
+
Returns:
|
148
|
+
Dictionary of baseline snapshots
|
149
|
+
"""
|
150
|
+
workflows = custom_workflows or self.test_workflows
|
151
|
+
baselines = {}
|
152
|
+
|
153
|
+
print(f"Creating baseline with {len(workflows)} test workflows...")
|
154
|
+
|
155
|
+
for test_name, workflow in workflows:
|
156
|
+
print(f" Creating baseline for: {test_name}")
|
157
|
+
baseline = self._create_baseline_snapshot(test_name, workflow, config)
|
158
|
+
if baseline:
|
159
|
+
baselines[test_name] = baseline
|
160
|
+
self.baselines[test_name] = baseline
|
161
|
+
|
162
|
+
# Save baselines to disk
|
163
|
+
self._save_baselines()
|
164
|
+
|
165
|
+
return baselines
|
166
|
+
|
167
|
+
def detect_regressions(
|
168
|
+
self,
|
169
|
+
config: Dict[str, Any],
|
170
|
+
custom_workflows: Optional[List[Tuple[str, Workflow]]] = None,
|
171
|
+
) -> RegressionReport:
|
172
|
+
"""Detect regressions by comparing against baseline.
|
173
|
+
|
174
|
+
Args:
|
175
|
+
config: Current LocalRuntime configuration
|
176
|
+
custom_workflows: Optional custom workflows to test
|
177
|
+
|
178
|
+
Returns:
|
179
|
+
Comprehensive regression report
|
180
|
+
"""
|
181
|
+
workflows = custom_workflows or self.test_workflows
|
182
|
+
|
183
|
+
report = RegressionReport(
|
184
|
+
total_tests=len(workflows), passed_tests=0, failed_tests=0
|
185
|
+
)
|
186
|
+
|
187
|
+
# Run tests and detect regressions
|
188
|
+
if self.parallel_tests:
|
189
|
+
self._run_parallel_regression_tests(workflows, config, report)
|
190
|
+
else:
|
191
|
+
self._run_sequential_regression_tests(workflows, config, report)
|
192
|
+
|
193
|
+
# Analyze overall status
|
194
|
+
report.overall_status = self._determine_overall_status(report)
|
195
|
+
|
196
|
+
return report
|
197
|
+
|
198
|
+
def _create_test_workflows(self) -> List[Tuple[str, Workflow]]:
|
199
|
+
"""Create standard test workflows for regression testing."""
|
200
|
+
workflows = []
|
201
|
+
|
202
|
+
# Simple execution test
|
203
|
+
simple_builder = WorkflowBuilder()
|
204
|
+
simple_builder.add_node(
|
205
|
+
"PythonCodeNode",
|
206
|
+
"simple",
|
207
|
+
{"code": "result = 'hello_world'", "output_key": "message"},
|
208
|
+
)
|
209
|
+
workflows.append(("simple_execution", simple_builder.build()))
|
210
|
+
|
211
|
+
# Performance test
|
212
|
+
perf_builder = WorkflowBuilder()
|
213
|
+
perf_builder.add_node(
|
214
|
+
"PythonCodeNode",
|
215
|
+
"performance",
|
216
|
+
{
|
217
|
+
"code": """
|
218
|
+
import time
|
219
|
+
start = time.time()
|
220
|
+
# Simulate work
|
221
|
+
result = sum(i*i for i in range(10000))
|
222
|
+
duration = time.time() - start
|
223
|
+
""",
|
224
|
+
"output_key": "calculation_result",
|
225
|
+
},
|
226
|
+
)
|
227
|
+
workflows.append(("performance_test", perf_builder.build()))
|
228
|
+
|
229
|
+
# Memory test
|
230
|
+
memory_builder = WorkflowBuilder()
|
231
|
+
memory_builder.add_node(
|
232
|
+
"PythonCodeNode",
|
233
|
+
"memory",
|
234
|
+
{
|
235
|
+
"code": """
|
236
|
+
import gc
|
237
|
+
# Create and cleanup large object
|
238
|
+
large_data = [list(range(1000)) for _ in range(100)]
|
239
|
+
result = len(large_data)
|
240
|
+
del large_data
|
241
|
+
gc.collect()
|
242
|
+
""",
|
243
|
+
"output_key": "memory_result",
|
244
|
+
},
|
245
|
+
)
|
246
|
+
workflows.append(("memory_test", memory_builder.build()))
|
247
|
+
|
248
|
+
# Error handling test
|
249
|
+
error_builder = WorkflowBuilder()
|
250
|
+
error_builder.add_node(
|
251
|
+
"PythonCodeNode",
|
252
|
+
"error_handling",
|
253
|
+
{
|
254
|
+
"code": """
|
255
|
+
try:
|
256
|
+
result = 1 / 0
|
257
|
+
except ZeroDivisionError:
|
258
|
+
result = "error_handled_correctly"
|
259
|
+
""",
|
260
|
+
"output_key": "error_result",
|
261
|
+
},
|
262
|
+
)
|
263
|
+
workflows.append(("error_handling_test", error_builder.build()))
|
264
|
+
|
265
|
+
# Multi-node workflow test
|
266
|
+
multi_builder = WorkflowBuilder()
|
267
|
+
multi_builder.add_node(
|
268
|
+
"PythonCodeNode",
|
269
|
+
"step1",
|
270
|
+
{"code": "result = [1, 2, 3, 4, 5]", "output_key": "numbers"},
|
271
|
+
)
|
272
|
+
multi_builder.add_node(
|
273
|
+
"PythonCodeNode",
|
274
|
+
"step2",
|
275
|
+
{
|
276
|
+
"code": "result = [x * 2 for x in numbers]",
|
277
|
+
"input_mapping": {"numbers": "step1.numbers"},
|
278
|
+
"output_key": "doubled",
|
279
|
+
},
|
280
|
+
)
|
281
|
+
multi_builder.add_node(
|
282
|
+
"PythonCodeNode",
|
283
|
+
"step3",
|
284
|
+
{
|
285
|
+
"code": "result = sum(doubled)",
|
286
|
+
"input_mapping": {"doubled": "step2.doubled"},
|
287
|
+
"output_key": "sum_result",
|
288
|
+
},
|
289
|
+
)
|
290
|
+
workflows.append(("multi_node_test", multi_builder.build()))
|
291
|
+
|
292
|
+
# Configuration sensitivity test
|
293
|
+
config_builder = WorkflowBuilder()
|
294
|
+
config_builder.add_node(
|
295
|
+
"PythonCodeNode",
|
296
|
+
"config_test",
|
297
|
+
{
|
298
|
+
"code": """
|
299
|
+
import os
|
300
|
+
import threading
|
301
|
+
# Test configuration-sensitive operations
|
302
|
+
result = {
|
303
|
+
'thread_id': threading.get_ident(),
|
304
|
+
'process_id': os.getpid(),
|
305
|
+
'environment_ready': True
|
306
|
+
}
|
307
|
+
""",
|
308
|
+
"output_key": "config_result",
|
309
|
+
},
|
310
|
+
)
|
311
|
+
workflows.append(("configuration_test", config_builder.build()))
|
312
|
+
|
313
|
+
return workflows
|
314
|
+
|
315
|
+
def _create_baseline_snapshot(
|
316
|
+
self, test_name: str, workflow: Workflow, config: Dict[str, Any]
|
317
|
+
) -> Optional[BaselineSnapshot]:
|
318
|
+
"""Create a baseline snapshot for a single test."""
|
319
|
+
try:
|
320
|
+
# Create workflow hash for change detection
|
321
|
+
workflow_hash = self._hash_workflow(workflow)
|
322
|
+
|
323
|
+
# Run test multiple times for stable metrics
|
324
|
+
runtime = LocalRuntime(**config)
|
325
|
+
execution_times = []
|
326
|
+
memory_usages = []
|
327
|
+
results_history = []
|
328
|
+
|
329
|
+
for _ in range(3): # 3 runs for stability
|
330
|
+
import psutil
|
331
|
+
|
332
|
+
process = psutil.Process()
|
333
|
+
|
334
|
+
# Measure before execution
|
335
|
+
memory_before = process.memory_info().rss / 1024 / 1024 # MB
|
336
|
+
|
337
|
+
# Execute workflow
|
338
|
+
start_time = time.perf_counter()
|
339
|
+
results, run_id = runtime.execute(workflow)
|
340
|
+
end_time = time.perf_counter()
|
341
|
+
|
342
|
+
# Measure after execution
|
343
|
+
memory_after = process.memory_info().rss / 1024 / 1024 # MB
|
344
|
+
|
345
|
+
execution_times.append((end_time - start_time) * 1000) # ms
|
346
|
+
memory_usages.append(memory_after - memory_before)
|
347
|
+
results_history.append(results)
|
348
|
+
|
349
|
+
# Calculate stable metrics
|
350
|
+
avg_execution_time = statistics.mean(execution_times)
|
351
|
+
avg_memory_usage = statistics.mean(memory_usages)
|
352
|
+
|
353
|
+
# Use first result as baseline (assuming deterministic workflows)
|
354
|
+
baseline_results = results_history[0]
|
355
|
+
|
356
|
+
# Create snapshot
|
357
|
+
snapshot = BaselineSnapshot(
|
358
|
+
test_name=test_name,
|
359
|
+
workflow_hash=workflow_hash,
|
360
|
+
configuration=config.copy(),
|
361
|
+
results=baseline_results,
|
362
|
+
performance_metrics={
|
363
|
+
"execution_time_ms": avg_execution_time,
|
364
|
+
"execution_time_stddev": (
|
365
|
+
statistics.stdev(execution_times)
|
366
|
+
if len(execution_times) > 1
|
367
|
+
else 0.0
|
368
|
+
),
|
369
|
+
},
|
370
|
+
resource_usage={
|
371
|
+
"memory_usage_mb": avg_memory_usage,
|
372
|
+
"memory_stddev": (
|
373
|
+
statistics.stdev(memory_usages)
|
374
|
+
if len(memory_usages) > 1
|
375
|
+
else 0.0
|
376
|
+
),
|
377
|
+
},
|
378
|
+
timestamp=datetime.now(timezone.utc),
|
379
|
+
metadata={
|
380
|
+
"runs": len(execution_times),
|
381
|
+
"config_hash": self._hash_config(config),
|
382
|
+
},
|
383
|
+
)
|
384
|
+
|
385
|
+
return snapshot
|
386
|
+
|
387
|
+
except Exception as e:
|
388
|
+
print(f"Failed to create baseline for {test_name}: {str(e)}")
|
389
|
+
return None
|
390
|
+
|
391
|
+
def _run_parallel_regression_tests(
|
392
|
+
self,
|
393
|
+
workflows: List[Tuple[str, Workflow]],
|
394
|
+
config: Dict[str, Any],
|
395
|
+
report: RegressionReport,
|
396
|
+
) -> None:
|
397
|
+
"""Run regression tests in parallel."""
|
398
|
+
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
399
|
+
# Submit all test jobs
|
400
|
+
future_to_test = {
|
401
|
+
executor.submit(
|
402
|
+
self._run_single_regression_test, test_name, workflow, config
|
403
|
+
): test_name
|
404
|
+
for test_name, workflow in workflows
|
405
|
+
}
|
406
|
+
|
407
|
+
# Collect results
|
408
|
+
for future in as_completed(future_to_test):
|
409
|
+
test_name = future_to_test[future]
|
410
|
+
try:
|
411
|
+
test_result = future.result()
|
412
|
+
self._process_test_result(test_name, test_result, report)
|
413
|
+
except Exception as e:
|
414
|
+
# Add test failure
|
415
|
+
report.failed_tests += 1
|
416
|
+
report.test_summary[test_name] = {
|
417
|
+
"status": "error",
|
418
|
+
"error": str(e),
|
419
|
+
}
|
420
|
+
|
421
|
+
def _run_sequential_regression_tests(
|
422
|
+
self,
|
423
|
+
workflows: List[Tuple[str, Workflow]],
|
424
|
+
config: Dict[str, Any],
|
425
|
+
report: RegressionReport,
|
426
|
+
) -> None:
|
427
|
+
"""Run regression tests sequentially."""
|
428
|
+
for test_name, workflow in workflows:
|
429
|
+
try:
|
430
|
+
test_result = self._run_single_regression_test(
|
431
|
+
test_name, workflow, config
|
432
|
+
)
|
433
|
+
self._process_test_result(test_name, test_result, report)
|
434
|
+
except Exception as e:
|
435
|
+
report.failed_tests += 1
|
436
|
+
report.test_summary[test_name] = {"status": "error", "error": str(e)}
|
437
|
+
|
438
|
+
def _run_single_regression_test(
|
439
|
+
self, test_name: str, workflow: Workflow, config: Dict[str, Any]
|
440
|
+
) -> Dict[str, Any]:
|
441
|
+
"""Run a single regression test and return results."""
|
442
|
+
# Get baseline for comparison
|
443
|
+
baseline = self.baselines.get(test_name)
|
444
|
+
if not baseline:
|
445
|
+
return {
|
446
|
+
"status": "no_baseline",
|
447
|
+
"message": f"No baseline found for test {test_name}",
|
448
|
+
}
|
449
|
+
|
450
|
+
# Check workflow consistency
|
451
|
+
current_hash = self._hash_workflow(workflow)
|
452
|
+
if current_hash != baseline.workflow_hash:
|
453
|
+
return {
|
454
|
+
"status": "workflow_changed",
|
455
|
+
"message": "Workflow has changed since baseline creation",
|
456
|
+
"baseline_hash": baseline.workflow_hash,
|
457
|
+
"current_hash": current_hash,
|
458
|
+
}
|
459
|
+
|
460
|
+
# Run test
|
461
|
+
runtime = LocalRuntime(**config)
|
462
|
+
|
463
|
+
# Performance measurement
|
464
|
+
import psutil
|
465
|
+
|
466
|
+
process = psutil.Process()
|
467
|
+
|
468
|
+
# Multiple runs for stability
|
469
|
+
execution_times = []
|
470
|
+
memory_usages = []
|
471
|
+
results_history = []
|
472
|
+
|
473
|
+
for _ in range(3):
|
474
|
+
memory_before = process.memory_info().rss / 1024 / 1024
|
475
|
+
|
476
|
+
start_time = time.perf_counter()
|
477
|
+
results, run_id = runtime.execute(workflow)
|
478
|
+
end_time = time.perf_counter()
|
479
|
+
|
480
|
+
memory_after = process.memory_info().rss / 1024 / 1024
|
481
|
+
|
482
|
+
execution_times.append((end_time - start_time) * 1000)
|
483
|
+
memory_usages.append(memory_after - memory_before)
|
484
|
+
results_history.append(results)
|
485
|
+
|
486
|
+
# Calculate current metrics
|
487
|
+
current_metrics = {
|
488
|
+
"execution_time_ms": statistics.mean(execution_times),
|
489
|
+
"memory_usage_mb": statistics.mean(memory_usages),
|
490
|
+
"results": results_history[0], # Use first result for consistency
|
491
|
+
}
|
492
|
+
|
493
|
+
return {
|
494
|
+
"status": "completed",
|
495
|
+
"baseline": baseline,
|
496
|
+
"current": current_metrics,
|
497
|
+
"runs": len(execution_times),
|
498
|
+
}
|
499
|
+
|
500
|
+
def _process_test_result(
|
501
|
+
self, test_name: str, test_result: Dict[str, Any], report: RegressionReport
|
502
|
+
) -> None:
|
503
|
+
"""Process a single test result and detect regressions."""
|
504
|
+
if test_result["status"] == "no_baseline":
|
505
|
+
report.baseline_missing.append(test_name)
|
506
|
+
report.test_summary[test_name] = test_result
|
507
|
+
return
|
508
|
+
|
509
|
+
if test_result["status"] != "completed":
|
510
|
+
report.failed_tests += 1
|
511
|
+
report.test_summary[test_name] = test_result
|
512
|
+
return
|
513
|
+
|
514
|
+
baseline = test_result["baseline"]
|
515
|
+
current = test_result["current"]
|
516
|
+
|
517
|
+
# Detect performance regressions
|
518
|
+
performance_issues = self._detect_performance_regressions(
|
519
|
+
test_name, baseline, current
|
520
|
+
)
|
521
|
+
report.regression_issues.extend(performance_issues)
|
522
|
+
|
523
|
+
# Detect functional regressions
|
524
|
+
functional_issues = self._detect_functional_regressions(
|
525
|
+
test_name, baseline, current
|
526
|
+
)
|
527
|
+
report.regression_issues.extend(functional_issues)
|
528
|
+
|
529
|
+
# Detect resource regressions
|
530
|
+
resource_issues = self._detect_resource_regressions(
|
531
|
+
test_name, baseline, current
|
532
|
+
)
|
533
|
+
report.regression_issues.extend(resource_issues)
|
534
|
+
|
535
|
+
# Determine test status
|
536
|
+
has_critical = any(
|
537
|
+
issue.severity == RegressionSeverity.CRITICAL
|
538
|
+
for issue in performance_issues + functional_issues + resource_issues
|
539
|
+
)
|
540
|
+
has_high = any(
|
541
|
+
issue.severity == RegressionSeverity.HIGH
|
542
|
+
for issue in performance_issues + functional_issues + resource_issues
|
543
|
+
)
|
544
|
+
|
545
|
+
if has_critical:
|
546
|
+
test_status = "critical_regression"
|
547
|
+
report.failed_tests += 1
|
548
|
+
elif has_high:
|
549
|
+
test_status = "high_regression"
|
550
|
+
report.failed_tests += 1
|
551
|
+
elif performance_issues or functional_issues or resource_issues:
|
552
|
+
test_status = "minor_regression"
|
553
|
+
report.passed_tests += 1
|
554
|
+
else:
|
555
|
+
test_status = "passed"
|
556
|
+
report.passed_tests += 1
|
557
|
+
|
558
|
+
report.test_summary[test_name] = {
|
559
|
+
"status": test_status,
|
560
|
+
"issues_found": len(
|
561
|
+
performance_issues + functional_issues + resource_issues
|
562
|
+
),
|
563
|
+
"execution_time_change": self._calculate_percentage_change(
|
564
|
+
baseline.performance_metrics.get("execution_time_ms", 0),
|
565
|
+
current["execution_time_ms"],
|
566
|
+
),
|
567
|
+
"memory_change": self._calculate_percentage_change(
|
568
|
+
baseline.resource_usage.get("memory_usage_mb", 0),
|
569
|
+
current["memory_usage_mb"],
|
570
|
+
),
|
571
|
+
}
|
572
|
+
|
573
|
+
def _detect_performance_regressions(
|
574
|
+
self, test_name: str, baseline: BaselineSnapshot, current: Dict[str, Any]
|
575
|
+
) -> List[RegressionIssue]:
|
576
|
+
"""Detect performance regressions."""
|
577
|
+
issues = []
|
578
|
+
|
579
|
+
# Execution time regression
|
580
|
+
baseline_time = baseline.performance_metrics.get("execution_time_ms", 0)
|
581
|
+
current_time = current["execution_time_ms"]
|
582
|
+
time_change = self._calculate_percentage_change(baseline_time, current_time)
|
583
|
+
|
584
|
+
if time_change > self.performance_threshold:
|
585
|
+
severity = self._determine_severity(time_change, self.performance_threshold)
|
586
|
+
issues.append(
|
587
|
+
RegressionIssue(
|
588
|
+
regression_type=RegressionType.PERFORMANCE,
|
589
|
+
severity=severity,
|
590
|
+
test_name=test_name,
|
591
|
+
description=f"Execution time increased by {time_change:.1f}%",
|
592
|
+
expected_value=baseline_time,
|
593
|
+
actual_value=current_time,
|
594
|
+
threshold=self.performance_threshold,
|
595
|
+
deviation_percentage=time_change,
|
596
|
+
metadata={"metric": "execution_time_ms"},
|
597
|
+
)
|
598
|
+
)
|
599
|
+
|
600
|
+
return issues
|
601
|
+
|
602
|
+
def _detect_functional_regressions(
|
603
|
+
self, test_name: str, baseline: BaselineSnapshot, current: Dict[str, Any]
|
604
|
+
) -> List[RegressionIssue]:
|
605
|
+
"""Detect functional regressions."""
|
606
|
+
issues = []
|
607
|
+
|
608
|
+
baseline_results = baseline.results
|
609
|
+
current_results = current["results"]
|
610
|
+
|
611
|
+
# Deep comparison of results
|
612
|
+
differences = self._deep_compare_results(baseline_results, current_results)
|
613
|
+
|
614
|
+
for diff in differences:
|
615
|
+
# Any functional change is considered a regression
|
616
|
+
severity = (
|
617
|
+
RegressionSeverity.HIGH
|
618
|
+
if diff["critical"]
|
619
|
+
else RegressionSeverity.MEDIUM
|
620
|
+
)
|
621
|
+
|
622
|
+
issues.append(
|
623
|
+
RegressionIssue(
|
624
|
+
regression_type=RegressionType.FUNCTIONAL,
|
625
|
+
severity=severity,
|
626
|
+
test_name=test_name,
|
627
|
+
description=f"Functional change detected: {diff['description']}",
|
628
|
+
expected_value=diff["expected"],
|
629
|
+
actual_value=diff["actual"],
|
630
|
+
threshold=0.0,
|
631
|
+
deviation_percentage=100.0, # Functional changes are 100% different
|
632
|
+
metadata={"path": diff["path"], "change_type": diff["type"]},
|
633
|
+
)
|
634
|
+
)
|
635
|
+
|
636
|
+
return issues
|
637
|
+
|
638
|
+
def _detect_resource_regressions(
|
639
|
+
self, test_name: str, baseline: BaselineSnapshot, current: Dict[str, Any]
|
640
|
+
) -> List[RegressionIssue]:
|
641
|
+
"""Detect resource usage regressions."""
|
642
|
+
issues = []
|
643
|
+
|
644
|
+
# Memory usage regression
|
645
|
+
baseline_memory = baseline.resource_usage.get("memory_usage_mb", 0)
|
646
|
+
current_memory = current["memory_usage_mb"]
|
647
|
+
memory_change = self._calculate_percentage_change(
|
648
|
+
baseline_memory, current_memory
|
649
|
+
)
|
650
|
+
|
651
|
+
if memory_change > self.resource_threshold:
|
652
|
+
severity = self._determine_severity(memory_change, self.resource_threshold)
|
653
|
+
issues.append(
|
654
|
+
RegressionIssue(
|
655
|
+
regression_type=RegressionType.RESOURCE,
|
656
|
+
severity=severity,
|
657
|
+
test_name=test_name,
|
658
|
+
description=f"Memory usage increased by {memory_change:.1f}%",
|
659
|
+
expected_value=baseline_memory,
|
660
|
+
actual_value=current_memory,
|
661
|
+
threshold=self.resource_threshold,
|
662
|
+
deviation_percentage=memory_change,
|
663
|
+
metadata={"metric": "memory_usage_mb"},
|
664
|
+
)
|
665
|
+
)
|
666
|
+
|
667
|
+
return issues
|
668
|
+
|
669
|
+
def _calculate_percentage_change(self, baseline: float, current: float) -> float:
|
670
|
+
"""Calculate percentage change from baseline."""
|
671
|
+
if baseline == 0:
|
672
|
+
return 0.0 if current == 0 else float("inf")
|
673
|
+
return ((current - baseline) / baseline) * 100
|
674
|
+
|
675
|
+
def _determine_severity(
|
676
|
+
self, change_percentage: float, threshold: float
|
677
|
+
) -> RegressionSeverity:
|
678
|
+
"""Determine severity based on change percentage."""
|
679
|
+
if change_percentage >= threshold * 4:
|
680
|
+
return RegressionSeverity.CRITICAL
|
681
|
+
elif change_percentage >= threshold * 2:
|
682
|
+
return RegressionSeverity.HIGH
|
683
|
+
elif change_percentage >= threshold:
|
684
|
+
return RegressionSeverity.MEDIUM
|
685
|
+
else:
|
686
|
+
return RegressionSeverity.LOW
|
687
|
+
|
688
|
+
def _deep_compare_results(
|
689
|
+
self, baseline: Dict, current: Dict
|
690
|
+
) -> List[Dict[str, Any]]:
|
691
|
+
"""Deep comparison of result dictionaries."""
|
692
|
+
differences = []
|
693
|
+
|
694
|
+
def compare_recursive(base_obj, curr_obj, path=""):
|
695
|
+
if type(base_obj) != type(curr_obj):
|
696
|
+
differences.append(
|
697
|
+
{
|
698
|
+
"path": path,
|
699
|
+
"type": "type_change",
|
700
|
+
"description": f"Type changed from {type(base_obj).__name__} to {type(curr_obj).__name__}",
|
701
|
+
"expected": type(base_obj).__name__,
|
702
|
+
"actual": type(curr_obj).__name__,
|
703
|
+
"critical": True,
|
704
|
+
}
|
705
|
+
)
|
706
|
+
return
|
707
|
+
|
708
|
+
if isinstance(base_obj, dict):
|
709
|
+
# Check for missing keys
|
710
|
+
base_keys = set(base_obj.keys())
|
711
|
+
curr_keys = set(curr_obj.keys())
|
712
|
+
|
713
|
+
missing_keys = base_keys - curr_keys
|
714
|
+
new_keys = curr_keys - base_keys
|
715
|
+
|
716
|
+
for key in missing_keys:
|
717
|
+
differences.append(
|
718
|
+
{
|
719
|
+
"path": f"{path}.{key}" if path else key,
|
720
|
+
"type": "missing_key",
|
721
|
+
"description": f'Key "{key}" missing from results',
|
722
|
+
"expected": base_obj[key],
|
723
|
+
"actual": None,
|
724
|
+
"critical": True,
|
725
|
+
}
|
726
|
+
)
|
727
|
+
|
728
|
+
for key in new_keys:
|
729
|
+
differences.append(
|
730
|
+
{
|
731
|
+
"path": f"{path}.{key}" if path else key,
|
732
|
+
"type": "new_key",
|
733
|
+
"description": f'Unexpected key "{key}" in results',
|
734
|
+
"expected": None,
|
735
|
+
"actual": curr_obj[key],
|
736
|
+
"critical": False,
|
737
|
+
}
|
738
|
+
)
|
739
|
+
|
740
|
+
# Compare common keys
|
741
|
+
for key in base_keys & curr_keys:
|
742
|
+
new_path = f"{path}.{key}" if path else key
|
743
|
+
compare_recursive(base_obj[key], curr_obj[key], new_path)
|
744
|
+
|
745
|
+
elif isinstance(base_obj, list):
|
746
|
+
if len(base_obj) != len(curr_obj):
|
747
|
+
differences.append(
|
748
|
+
{
|
749
|
+
"path": path,
|
750
|
+
"type": "length_change",
|
751
|
+
"description": f"List length changed from {len(base_obj)} to {len(curr_obj)}",
|
752
|
+
"expected": len(base_obj),
|
753
|
+
"actual": len(curr_obj),
|
754
|
+
"critical": True,
|
755
|
+
}
|
756
|
+
)
|
757
|
+
return
|
758
|
+
|
759
|
+
for i, (base_item, curr_item) in enumerate(zip(base_obj, curr_obj)):
|
760
|
+
compare_recursive(base_item, curr_item, f"{path}[{i}]")
|
761
|
+
|
762
|
+
else:
|
763
|
+
# Compare primitive values
|
764
|
+
if base_obj != curr_obj:
|
765
|
+
differences.append(
|
766
|
+
{
|
767
|
+
"path": path,
|
768
|
+
"type": "value_change",
|
769
|
+
"description": f'Value changed from "{base_obj}" to "{curr_obj}"',
|
770
|
+
"expected": base_obj,
|
771
|
+
"actual": curr_obj,
|
772
|
+
"critical": False,
|
773
|
+
}
|
774
|
+
)
|
775
|
+
|
776
|
+
compare_recursive(baseline, current)
|
777
|
+
return differences
|
778
|
+
|
779
|
+
def _determine_overall_status(self, report: RegressionReport) -> str:
|
780
|
+
"""Determine overall status from report."""
|
781
|
+
critical_issues = len(
|
782
|
+
[
|
783
|
+
i
|
784
|
+
for i in report.regression_issues
|
785
|
+
if i.severity == RegressionSeverity.CRITICAL
|
786
|
+
]
|
787
|
+
)
|
788
|
+
high_issues = len(
|
789
|
+
[
|
790
|
+
i
|
791
|
+
for i in report.regression_issues
|
792
|
+
if i.severity == RegressionSeverity.HIGH
|
793
|
+
]
|
794
|
+
)
|
795
|
+
|
796
|
+
if critical_issues > 0:
|
797
|
+
return "critical_regressions"
|
798
|
+
elif high_issues > 0:
|
799
|
+
return "high_regressions"
|
800
|
+
elif len(report.regression_issues) > 0:
|
801
|
+
return "minor_regressions"
|
802
|
+
elif report.failed_tests > 0:
|
803
|
+
return "test_failures"
|
804
|
+
elif len(report.baseline_missing) > 0:
|
805
|
+
return "missing_baselines"
|
806
|
+
else:
|
807
|
+
return "all_passed"
|
808
|
+
|
809
|
+
def _hash_workflow(self, workflow: Workflow) -> str:
|
810
|
+
"""Create a hash of the workflow for change detection."""
|
811
|
+
# Convert workflow to a deterministic string representation
|
812
|
+
workflow_str = json.dumps(workflow.to_dict(), sort_keys=True)
|
813
|
+
return hashlib.sha256(workflow_str.encode()).hexdigest()[:16]
|
814
|
+
|
815
|
+
def _hash_config(self, config: Dict[str, Any]) -> str:
|
816
|
+
"""Create a hash of the configuration."""
|
817
|
+
config_str = json.dumps(config, sort_keys=True, default=str)
|
818
|
+
return hashlib.sha256(config_str.encode()).hexdigest()[:16]
|
819
|
+
|
820
|
+
def _save_baselines(self) -> None:
|
821
|
+
"""Save baselines to disk."""
|
822
|
+
baseline_data = {}
|
823
|
+
for test_name, baseline in self.baselines.items():
|
824
|
+
baseline_data[test_name] = {
|
825
|
+
"test_name": baseline.test_name,
|
826
|
+
"workflow_hash": baseline.workflow_hash,
|
827
|
+
"configuration": baseline.configuration,
|
828
|
+
"results": baseline.results,
|
829
|
+
"performance_metrics": baseline.performance_metrics,
|
830
|
+
"resource_usage": baseline.resource_usage,
|
831
|
+
"timestamp": baseline.timestamp.isoformat(),
|
832
|
+
"metadata": baseline.metadata,
|
833
|
+
}
|
834
|
+
|
835
|
+
with open(self.baseline_path, "w") as f:
|
836
|
+
json.dump(baseline_data, f, indent=2, default=str)
|
837
|
+
|
838
|
+
def _load_baselines(self) -> None:
|
839
|
+
"""Load baselines from disk."""
|
840
|
+
if not self.baseline_path.exists():
|
841
|
+
return
|
842
|
+
|
843
|
+
try:
|
844
|
+
with open(self.baseline_path, "r") as f:
|
845
|
+
baseline_data = json.load(f)
|
846
|
+
|
847
|
+
for test_name, data in baseline_data.items():
|
848
|
+
baseline = BaselineSnapshot(
|
849
|
+
test_name=data["test_name"],
|
850
|
+
workflow_hash=data["workflow_hash"],
|
851
|
+
configuration=data["configuration"],
|
852
|
+
results=data["results"],
|
853
|
+
performance_metrics=data["performance_metrics"],
|
854
|
+
resource_usage=data["resource_usage"],
|
855
|
+
timestamp=datetime.fromisoformat(data["timestamp"]),
|
856
|
+
metadata=data["metadata"],
|
857
|
+
)
|
858
|
+
self.baselines[test_name] = baseline
|
859
|
+
|
860
|
+
except Exception as e:
|
861
|
+
print(f"Failed to load baselines: {str(e)}")
|
862
|
+
|
863
|
+
def generate_regression_report(
|
864
|
+
self, report: RegressionReport, format: str = "text"
|
865
|
+
) -> str:
|
866
|
+
"""Generate a comprehensive regression report.
|
867
|
+
|
868
|
+
Args:
|
869
|
+
report: Regression detection report
|
870
|
+
format: Report format ("text", "json", "markdown")
|
871
|
+
|
872
|
+
Returns:
|
873
|
+
Formatted regression report
|
874
|
+
"""
|
875
|
+
if format == "json":
|
876
|
+
return self._generate_json_report(report)
|
877
|
+
elif format == "markdown":
|
878
|
+
return self._generate_markdown_report(report)
|
879
|
+
else:
|
880
|
+
return self._generate_text_report(report)
|
881
|
+
|
882
|
+
def _generate_text_report(self, report: RegressionReport) -> str:
|
883
|
+
"""Generate text format regression report."""
|
884
|
+
lines = []
|
885
|
+
lines.append("=" * 60)
|
886
|
+
lines.append("LocalRuntime Regression Detection Report")
|
887
|
+
lines.append("=" * 60)
|
888
|
+
lines.append("")
|
889
|
+
|
890
|
+
# Executive summary
|
891
|
+
lines.append("EXECUTIVE SUMMARY")
|
892
|
+
lines.append("-" * 20)
|
893
|
+
lines.append(
|
894
|
+
f"Overall Status: {report.overall_status.upper().replace('_', ' ')}"
|
895
|
+
)
|
896
|
+
lines.append(f"Total Tests: {report.total_tests}")
|
897
|
+
lines.append(f"Passed: {report.passed_tests}")
|
898
|
+
lines.append(f"Failed: {report.failed_tests}")
|
899
|
+
lines.append(f"Missing Baselines: {len(report.baseline_missing)}")
|
900
|
+
lines.append(f"Regression Issues: {len(report.regression_issues)}")
|
901
|
+
lines.append("")
|
902
|
+
|
903
|
+
# Regression issues by severity
|
904
|
+
if report.regression_issues:
|
905
|
+
lines.append("REGRESSION ISSUES")
|
906
|
+
lines.append("-" * 20)
|
907
|
+
|
908
|
+
for severity in RegressionSeverity:
|
909
|
+
severity_issues = [
|
910
|
+
i for i in report.regression_issues if i.severity == severity
|
911
|
+
]
|
912
|
+
if severity_issues:
|
913
|
+
lines.append(
|
914
|
+
f"\n{severity.value.upper()} ISSUES ({len(severity_issues)}):"
|
915
|
+
)
|
916
|
+
for issue in severity_issues:
|
917
|
+
lines.append(f" • {issue.test_name}: {issue.description}")
|
918
|
+
lines.append(f" Expected: {issue.expected_value}")
|
919
|
+
lines.append(f" Actual: {issue.actual_value}")
|
920
|
+
lines.append(
|
921
|
+
f" Deviation: {issue.deviation_percentage:.1f}%"
|
922
|
+
)
|
923
|
+
lines.append("")
|
924
|
+
|
925
|
+
# Test summary
|
926
|
+
lines.append("TEST SUMMARY")
|
927
|
+
lines.append("-" * 15)
|
928
|
+
for test_name, summary in report.test_summary.items():
|
929
|
+
status_icon = {
|
930
|
+
"passed": "✅",
|
931
|
+
"critical_regression": "🚨",
|
932
|
+
"high_regression": "⚠️",
|
933
|
+
"minor_regression": "⚠️",
|
934
|
+
"error": "❌",
|
935
|
+
"no_baseline": "❓",
|
936
|
+
}.get(summary["status"], "❓")
|
937
|
+
|
938
|
+
lines.append(
|
939
|
+
f"{status_icon} {test_name}: {summary['status'].replace('_', ' ').title()}"
|
940
|
+
)
|
941
|
+
|
942
|
+
if "execution_time_change" in summary:
|
943
|
+
lines.append(
|
944
|
+
f" Performance: {summary['execution_time_change']:+.1f}%"
|
945
|
+
)
|
946
|
+
if "memory_change" in summary:
|
947
|
+
lines.append(f" Memory: {summary['memory_change']:+.1f}%")
|
948
|
+
if "issues_found" in summary:
|
949
|
+
lines.append(f" Issues: {summary['issues_found']}")
|
950
|
+
lines.append("")
|
951
|
+
|
952
|
+
# Recommendations
|
953
|
+
lines.append("RECOMMENDATIONS")
|
954
|
+
lines.append("-" * 18)
|
955
|
+
|
956
|
+
critical_issues = len(
|
957
|
+
[
|
958
|
+
i
|
959
|
+
for i in report.regression_issues
|
960
|
+
if i.severity == RegressionSeverity.CRITICAL
|
961
|
+
]
|
962
|
+
)
|
963
|
+
high_issues = len(
|
964
|
+
[
|
965
|
+
i
|
966
|
+
for i in report.regression_issues
|
967
|
+
if i.severity == RegressionSeverity.HIGH
|
968
|
+
]
|
969
|
+
)
|
970
|
+
|
971
|
+
if critical_issues > 0:
|
972
|
+
lines.append("🚨 CRITICAL: Migration should be rolled back immediately")
|
973
|
+
lines.append(
|
974
|
+
" - Critical regressions detected that affect system functionality"
|
975
|
+
)
|
976
|
+
lines.append(
|
977
|
+
" - Investigate and resolve issues before retrying migration"
|
978
|
+
)
|
979
|
+
elif high_issues > 0:
|
980
|
+
lines.append(
|
981
|
+
"⚠️ HIGH PRIORITY: Address high-severity issues before production"
|
982
|
+
)
|
983
|
+
lines.append(" - Significant regressions detected")
|
984
|
+
lines.append(" - Consider additional testing and optimization")
|
985
|
+
elif len(report.regression_issues) > 0:
|
986
|
+
lines.append("ℹ️ MINOR: Migration successful with minor regressions")
|
987
|
+
lines.append(" - Monitor system behavior in production")
|
988
|
+
lines.append(" - Consider performance optimizations")
|
989
|
+
else:
|
990
|
+
lines.append("✅ SUCCESS: Migration completed without regressions")
|
991
|
+
lines.append(" - System is performing as expected")
|
992
|
+
lines.append(" - Safe to proceed with production deployment")
|
993
|
+
|
994
|
+
return "\n".join(lines)
|
995
|
+
|
996
|
+
def _generate_json_report(self, report: RegressionReport) -> str:
|
997
|
+
"""Generate JSON format regression report."""
|
998
|
+
data = {
|
999
|
+
"summary": {
|
1000
|
+
"overall_status": report.overall_status,
|
1001
|
+
"total_tests": report.total_tests,
|
1002
|
+
"passed_tests": report.passed_tests,
|
1003
|
+
"failed_tests": report.failed_tests,
|
1004
|
+
"baseline_missing": len(report.baseline_missing),
|
1005
|
+
"regression_issues": len(report.regression_issues),
|
1006
|
+
"generated_at": report.generated_at.isoformat(),
|
1007
|
+
},
|
1008
|
+
"regression_issues": [
|
1009
|
+
{
|
1010
|
+
"type": issue.regression_type,
|
1011
|
+
"severity": issue.severity,
|
1012
|
+
"test_name": issue.test_name,
|
1013
|
+
"description": issue.description,
|
1014
|
+
"expected_value": issue.expected_value,
|
1015
|
+
"actual_value": issue.actual_value,
|
1016
|
+
"threshold": issue.threshold,
|
1017
|
+
"deviation_percentage": issue.deviation_percentage,
|
1018
|
+
"metadata": issue.metadata,
|
1019
|
+
}
|
1020
|
+
for issue in report.regression_issues
|
1021
|
+
],
|
1022
|
+
"test_summary": report.test_summary,
|
1023
|
+
"baseline_missing": report.baseline_missing,
|
1024
|
+
}
|
1025
|
+
|
1026
|
+
return json.dumps(data, indent=2, default=str)
|
1027
|
+
|
1028
|
+
def _generate_markdown_report(self, report: RegressionReport) -> str:
|
1029
|
+
"""Generate markdown format regression report."""
|
1030
|
+
lines = []
|
1031
|
+
lines.append("# LocalRuntime Regression Detection Report")
|
1032
|
+
lines.append("")
|
1033
|
+
|
1034
|
+
# Status badge
|
1035
|
+
status_emoji = {
|
1036
|
+
"all_passed": "🟢",
|
1037
|
+
"minor_regressions": "🟡",
|
1038
|
+
"high_regressions": "🟠",
|
1039
|
+
"critical_regressions": "🔴",
|
1040
|
+
"test_failures": "🔴",
|
1041
|
+
"missing_baselines": "⚪",
|
1042
|
+
}
|
1043
|
+
|
1044
|
+
emoji = status_emoji.get(report.overall_status, "⚪")
|
1045
|
+
lines.append(
|
1046
|
+
f"## {emoji} Status: {report.overall_status.replace('_', ' ').title()}"
|
1047
|
+
)
|
1048
|
+
lines.append("")
|
1049
|
+
|
1050
|
+
# Summary table
|
1051
|
+
lines.append("## Summary")
|
1052
|
+
lines.append("")
|
1053
|
+
lines.append("| Metric | Count |")
|
1054
|
+
lines.append("|--------|-------|")
|
1055
|
+
lines.append(f"| Total Tests | {report.total_tests} |")
|
1056
|
+
lines.append(f"| Passed | {report.passed_tests} |")
|
1057
|
+
lines.append(f"| Failed | {report.failed_tests} |")
|
1058
|
+
lines.append(f"| Missing Baselines | {len(report.baseline_missing)} |")
|
1059
|
+
lines.append(f"| Regression Issues | {len(report.regression_issues)} |")
|
1060
|
+
lines.append("")
|
1061
|
+
|
1062
|
+
# Regression issues
|
1063
|
+
if report.regression_issues:
|
1064
|
+
lines.append("## Regression Issues")
|
1065
|
+
lines.append("")
|
1066
|
+
|
1067
|
+
for severity in RegressionSeverity:
|
1068
|
+
severity_issues = [
|
1069
|
+
i for i in report.regression_issues if i.severity == severity
|
1070
|
+
]
|
1071
|
+
if severity_issues:
|
1072
|
+
severity_emoji = {
|
1073
|
+
RegressionSeverity.CRITICAL: "🚨",
|
1074
|
+
RegressionSeverity.HIGH: "⚠️",
|
1075
|
+
RegressionSeverity.MEDIUM: "⚠️",
|
1076
|
+
RegressionSeverity.LOW: "ℹ️",
|
1077
|
+
}[severity]
|
1078
|
+
|
1079
|
+
lines.append(
|
1080
|
+
f"### {severity_emoji} {severity.value.title()} Issues"
|
1081
|
+
)
|
1082
|
+
lines.append("")
|
1083
|
+
|
1084
|
+
for issue in severity_issues:
|
1085
|
+
lines.append(f"**{issue.test_name}**: {issue.description}")
|
1086
|
+
lines.append("")
|
1087
|
+
lines.append(f"- **Expected**: {issue.expected_value}")
|
1088
|
+
lines.append(f"- **Actual**: {issue.actual_value}")
|
1089
|
+
lines.append(
|
1090
|
+
f"- **Deviation**: {issue.deviation_percentage:.1f}%"
|
1091
|
+
)
|
1092
|
+
lines.append(f"- **Type**: {issue.regression_type}")
|
1093
|
+
lines.append("")
|
1094
|
+
|
1095
|
+
# Test results
|
1096
|
+
lines.append("## Test Results")
|
1097
|
+
lines.append("")
|
1098
|
+
lines.append("| Test | Status | Performance Change | Memory Change |")
|
1099
|
+
lines.append("|------|--------|-------------------|---------------|")
|
1100
|
+
|
1101
|
+
for test_name, summary in report.test_summary.items():
|
1102
|
+
status_icon = {
|
1103
|
+
"passed": "✅",
|
1104
|
+
"critical_regression": "🚨",
|
1105
|
+
"high_regression": "⚠️",
|
1106
|
+
"minor_regression": "⚠️",
|
1107
|
+
"error": "❌",
|
1108
|
+
"no_baseline": "❓",
|
1109
|
+
}.get(summary["status"], "❓")
|
1110
|
+
|
1111
|
+
perf_change = summary.get("execution_time_change", "N/A")
|
1112
|
+
memory_change = summary.get("memory_change", "N/A")
|
1113
|
+
|
1114
|
+
if isinstance(perf_change, (int, float)):
|
1115
|
+
perf_change = f"{perf_change:+.1f}%"
|
1116
|
+
if isinstance(memory_change, (int, float)):
|
1117
|
+
memory_change = f"{memory_change:+.1f}%"
|
1118
|
+
|
1119
|
+
lines.append(
|
1120
|
+
f"| {test_name} | {status_icon} {summary['status'].replace('_', ' ').title()} | {perf_change} | {memory_change} |"
|
1121
|
+
)
|
1122
|
+
|
1123
|
+
return "\n".join(lines)
|
1124
|
+
|
1125
|
+
def save_report(
|
1126
|
+
self,
|
1127
|
+
report: RegressionReport,
|
1128
|
+
file_path: Union[str, Path],
|
1129
|
+
format: str = "json",
|
1130
|
+
) -> None:
|
1131
|
+
"""Save regression report to file.
|
1132
|
+
|
1133
|
+
Args:
|
1134
|
+
report: Regression report to save
|
1135
|
+
file_path: Output file path
|
1136
|
+
format: Report format ("text", "json", "markdown")
|
1137
|
+
"""
|
1138
|
+
content = self.generate_regression_report(report, format)
|
1139
|
+
|
1140
|
+
with open(file_path, "w", encoding="utf-8") as f:
|
1141
|
+
f.write(content)
|