crackerjack 0.33.0__py3-none-any.whl → 0.33.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crackerjack might be problematic. Click here for more details.

Files changed (198) hide show
  1. crackerjack/__main__.py +1350 -34
  2. crackerjack/adapters/__init__.py +17 -0
  3. crackerjack/adapters/lsp_client.py +358 -0
  4. crackerjack/adapters/rust_tool_adapter.py +194 -0
  5. crackerjack/adapters/rust_tool_manager.py +193 -0
  6. crackerjack/adapters/skylos_adapter.py +231 -0
  7. crackerjack/adapters/zuban_adapter.py +560 -0
  8. crackerjack/agents/base.py +7 -3
  9. crackerjack/agents/coordinator.py +271 -33
  10. crackerjack/agents/documentation_agent.py +9 -15
  11. crackerjack/agents/dry_agent.py +3 -15
  12. crackerjack/agents/formatting_agent.py +1 -1
  13. crackerjack/agents/import_optimization_agent.py +36 -180
  14. crackerjack/agents/performance_agent.py +17 -98
  15. crackerjack/agents/performance_helpers.py +7 -31
  16. crackerjack/agents/proactive_agent.py +1 -3
  17. crackerjack/agents/refactoring_agent.py +16 -85
  18. crackerjack/agents/refactoring_helpers.py +7 -42
  19. crackerjack/agents/security_agent.py +9 -48
  20. crackerjack/agents/test_creation_agent.py +356 -513
  21. crackerjack/agents/test_specialist_agent.py +0 -4
  22. crackerjack/api.py +6 -25
  23. crackerjack/cli/cache_handlers.py +204 -0
  24. crackerjack/cli/cache_handlers_enhanced.py +683 -0
  25. crackerjack/cli/facade.py +100 -0
  26. crackerjack/cli/handlers.py +224 -9
  27. crackerjack/cli/interactive.py +6 -4
  28. crackerjack/cli/options.py +642 -55
  29. crackerjack/cli/utils.py +2 -1
  30. crackerjack/code_cleaner.py +58 -117
  31. crackerjack/config/global_lock_config.py +8 -48
  32. crackerjack/config/hooks.py +53 -62
  33. crackerjack/core/async_workflow_orchestrator.py +24 -34
  34. crackerjack/core/autofix_coordinator.py +3 -17
  35. crackerjack/core/enhanced_container.py +4 -13
  36. crackerjack/core/file_lifecycle.py +12 -89
  37. crackerjack/core/performance.py +2 -2
  38. crackerjack/core/performance_monitor.py +15 -55
  39. crackerjack/core/phase_coordinator.py +104 -204
  40. crackerjack/core/resource_manager.py +14 -90
  41. crackerjack/core/service_watchdog.py +62 -95
  42. crackerjack/core/session_coordinator.py +149 -0
  43. crackerjack/core/timeout_manager.py +14 -72
  44. crackerjack/core/websocket_lifecycle.py +13 -78
  45. crackerjack/core/workflow_orchestrator.py +171 -174
  46. crackerjack/docs/INDEX.md +11 -0
  47. crackerjack/docs/generated/api/API_REFERENCE.md +10895 -0
  48. crackerjack/docs/generated/api/CLI_REFERENCE.md +109 -0
  49. crackerjack/docs/generated/api/CROSS_REFERENCES.md +1755 -0
  50. crackerjack/docs/generated/api/PROTOCOLS.md +3 -0
  51. crackerjack/docs/generated/api/SERVICES.md +1252 -0
  52. crackerjack/documentation/__init__.py +31 -0
  53. crackerjack/documentation/ai_templates.py +756 -0
  54. crackerjack/documentation/dual_output_generator.py +765 -0
  55. crackerjack/documentation/mkdocs_integration.py +518 -0
  56. crackerjack/documentation/reference_generator.py +977 -0
  57. crackerjack/dynamic_config.py +55 -50
  58. crackerjack/executors/async_hook_executor.py +10 -15
  59. crackerjack/executors/cached_hook_executor.py +117 -43
  60. crackerjack/executors/hook_executor.py +8 -34
  61. crackerjack/executors/hook_lock_manager.py +26 -183
  62. crackerjack/executors/individual_hook_executor.py +13 -11
  63. crackerjack/executors/lsp_aware_hook_executor.py +270 -0
  64. crackerjack/executors/tool_proxy.py +417 -0
  65. crackerjack/hooks/lsp_hook.py +79 -0
  66. crackerjack/intelligence/adaptive_learning.py +25 -10
  67. crackerjack/intelligence/agent_orchestrator.py +2 -5
  68. crackerjack/intelligence/agent_registry.py +34 -24
  69. crackerjack/intelligence/agent_selector.py +5 -7
  70. crackerjack/interactive.py +17 -6
  71. crackerjack/managers/async_hook_manager.py +0 -1
  72. crackerjack/managers/hook_manager.py +79 -1
  73. crackerjack/managers/publish_manager.py +44 -8
  74. crackerjack/managers/test_command_builder.py +1 -15
  75. crackerjack/managers/test_executor.py +1 -3
  76. crackerjack/managers/test_manager.py +98 -7
  77. crackerjack/managers/test_manager_backup.py +10 -9
  78. crackerjack/mcp/cache.py +2 -2
  79. crackerjack/mcp/client_runner.py +1 -1
  80. crackerjack/mcp/context.py +191 -68
  81. crackerjack/mcp/dashboard.py +7 -5
  82. crackerjack/mcp/enhanced_progress_monitor.py +31 -28
  83. crackerjack/mcp/file_monitor.py +30 -23
  84. crackerjack/mcp/progress_components.py +31 -21
  85. crackerjack/mcp/progress_monitor.py +50 -53
  86. crackerjack/mcp/rate_limiter.py +6 -6
  87. crackerjack/mcp/server_core.py +17 -16
  88. crackerjack/mcp/service_watchdog.py +2 -1
  89. crackerjack/mcp/state.py +4 -7
  90. crackerjack/mcp/task_manager.py +11 -9
  91. crackerjack/mcp/tools/core_tools.py +173 -32
  92. crackerjack/mcp/tools/error_analyzer.py +3 -2
  93. crackerjack/mcp/tools/execution_tools.py +8 -10
  94. crackerjack/mcp/tools/execution_tools_backup.py +42 -30
  95. crackerjack/mcp/tools/intelligence_tool_registry.py +7 -5
  96. crackerjack/mcp/tools/intelligence_tools.py +5 -2
  97. crackerjack/mcp/tools/monitoring_tools.py +33 -70
  98. crackerjack/mcp/tools/proactive_tools.py +24 -11
  99. crackerjack/mcp/tools/progress_tools.py +5 -8
  100. crackerjack/mcp/tools/utility_tools.py +20 -14
  101. crackerjack/mcp/tools/workflow_executor.py +62 -40
  102. crackerjack/mcp/websocket/app.py +8 -0
  103. crackerjack/mcp/websocket/endpoints.py +352 -357
  104. crackerjack/mcp/websocket/jobs.py +40 -57
  105. crackerjack/mcp/websocket/monitoring_endpoints.py +2935 -0
  106. crackerjack/mcp/websocket/server.py +7 -25
  107. crackerjack/mcp/websocket/websocket_handler.py +6 -17
  108. crackerjack/mixins/__init__.py +0 -2
  109. crackerjack/mixins/error_handling.py +1 -70
  110. crackerjack/models/config.py +12 -1
  111. crackerjack/models/config_adapter.py +49 -1
  112. crackerjack/models/protocols.py +122 -122
  113. crackerjack/models/resource_protocols.py +55 -210
  114. crackerjack/monitoring/ai_agent_watchdog.py +13 -13
  115. crackerjack/monitoring/metrics_collector.py +426 -0
  116. crackerjack/monitoring/regression_prevention.py +8 -8
  117. crackerjack/monitoring/websocket_server.py +643 -0
  118. crackerjack/orchestration/advanced_orchestrator.py +11 -6
  119. crackerjack/orchestration/coverage_improvement.py +3 -3
  120. crackerjack/orchestration/execution_strategies.py +26 -6
  121. crackerjack/orchestration/test_progress_streamer.py +8 -5
  122. crackerjack/plugins/base.py +2 -2
  123. crackerjack/plugins/hooks.py +7 -0
  124. crackerjack/plugins/managers.py +11 -8
  125. crackerjack/security/__init__.py +0 -1
  126. crackerjack/security/audit.py +6 -35
  127. crackerjack/services/anomaly_detector.py +392 -0
  128. crackerjack/services/api_extractor.py +615 -0
  129. crackerjack/services/backup_service.py +2 -2
  130. crackerjack/services/bounded_status_operations.py +15 -152
  131. crackerjack/services/cache.py +127 -1
  132. crackerjack/services/changelog_automation.py +395 -0
  133. crackerjack/services/config.py +15 -9
  134. crackerjack/services/config_merge.py +19 -80
  135. crackerjack/services/config_template.py +506 -0
  136. crackerjack/services/contextual_ai_assistant.py +48 -22
  137. crackerjack/services/coverage_badge_service.py +171 -0
  138. crackerjack/services/coverage_ratchet.py +27 -25
  139. crackerjack/services/debug.py +3 -3
  140. crackerjack/services/dependency_analyzer.py +460 -0
  141. crackerjack/services/dependency_monitor.py +14 -11
  142. crackerjack/services/documentation_generator.py +491 -0
  143. crackerjack/services/documentation_service.py +675 -0
  144. crackerjack/services/enhanced_filesystem.py +6 -5
  145. crackerjack/services/enterprise_optimizer.py +865 -0
  146. crackerjack/services/error_pattern_analyzer.py +676 -0
  147. crackerjack/services/file_hasher.py +1 -1
  148. crackerjack/services/git.py +8 -25
  149. crackerjack/services/health_metrics.py +10 -8
  150. crackerjack/services/heatmap_generator.py +735 -0
  151. crackerjack/services/initialization.py +11 -30
  152. crackerjack/services/input_validator.py +5 -97
  153. crackerjack/services/intelligent_commit.py +327 -0
  154. crackerjack/services/log_manager.py +15 -12
  155. crackerjack/services/logging.py +4 -3
  156. crackerjack/services/lsp_client.py +628 -0
  157. crackerjack/services/memory_optimizer.py +19 -87
  158. crackerjack/services/metrics.py +42 -33
  159. crackerjack/services/parallel_executor.py +9 -67
  160. crackerjack/services/pattern_cache.py +1 -1
  161. crackerjack/services/pattern_detector.py +6 -6
  162. crackerjack/services/performance_benchmarks.py +18 -59
  163. crackerjack/services/performance_cache.py +20 -81
  164. crackerjack/services/performance_monitor.py +27 -95
  165. crackerjack/services/predictive_analytics.py +510 -0
  166. crackerjack/services/quality_baseline.py +234 -0
  167. crackerjack/services/quality_baseline_enhanced.py +646 -0
  168. crackerjack/services/quality_intelligence.py +785 -0
  169. crackerjack/services/regex_patterns.py +618 -524
  170. crackerjack/services/regex_utils.py +43 -123
  171. crackerjack/services/secure_path_utils.py +5 -164
  172. crackerjack/services/secure_status_formatter.py +30 -141
  173. crackerjack/services/secure_subprocess.py +11 -92
  174. crackerjack/services/security.py +9 -41
  175. crackerjack/services/security_logger.py +12 -24
  176. crackerjack/services/server_manager.py +124 -16
  177. crackerjack/services/status_authentication.py +16 -159
  178. crackerjack/services/status_security_manager.py +4 -131
  179. crackerjack/services/thread_safe_status_collector.py +19 -125
  180. crackerjack/services/unified_config.py +21 -13
  181. crackerjack/services/validation_rate_limiter.py +5 -54
  182. crackerjack/services/version_analyzer.py +459 -0
  183. crackerjack/services/version_checker.py +1 -1
  184. crackerjack/services/websocket_resource_limiter.py +10 -144
  185. crackerjack/services/zuban_lsp_service.py +390 -0
  186. crackerjack/slash_commands/__init__.py +2 -7
  187. crackerjack/slash_commands/run.md +2 -2
  188. crackerjack/tools/validate_input_validator_patterns.py +14 -40
  189. crackerjack/tools/validate_regex_patterns.py +19 -48
  190. {crackerjack-0.33.0.dist-info → crackerjack-0.33.2.dist-info}/METADATA +196 -25
  191. crackerjack-0.33.2.dist-info/RECORD +229 -0
  192. crackerjack/CLAUDE.md +0 -207
  193. crackerjack/RULES.md +0 -380
  194. crackerjack/py313.py +0 -234
  195. crackerjack-0.33.0.dist-info/RECORD +0 -187
  196. {crackerjack-0.33.0.dist-info → crackerjack-0.33.2.dist-info}/WHEEL +0 -0
  197. {crackerjack-0.33.0.dist-info → crackerjack-0.33.2.dist-info}/entry_points.txt +0 -0
  198. {crackerjack-0.33.0.dist-info → crackerjack-0.33.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,785 @@
1
+ """Advanced ML-based quality intelligence with anomaly detection and predictive analytics."""
2
+
3
+ import json
4
+ import typing as t
5
+ from dataclasses import dataclass, field
6
+ from datetime import datetime
7
+ from enum import Enum
8
+ from pathlib import Path
9
+
10
+ import numpy as np
11
+ from scipy import stats
12
+
13
+ from crackerjack.services.quality_baseline_enhanced import (
14
+ AlertSeverity,
15
+ EnhancedQualityBaselineService,
16
+ TrendDirection,
17
+ )
18
+
19
+
20
+ class AnomalyType(str, Enum):
21
+ """Types of anomalies that can be detected."""
22
+
23
+ SPIKE = "spike" # Sudden increase in metrics
24
+ DROP = "drop" # Sudden decrease in metrics
25
+ DRIFT = "drift" # Gradual change over time
26
+ OSCILLATION = "oscillation" # Unusual fluctuation patterns
27
+ OUTLIER = "outlier" # Statistical outlier
28
+
29
+
30
+ class PatternType(str, Enum):
31
+ """Types of patterns that can be identified."""
32
+
33
+ CYCLIC = "cyclic" # Regular recurring patterns
34
+ SEASONAL = "seasonal" # Time-based patterns
35
+ CORRELATION = "correlation" # Metric correlation patterns
36
+ REGRESSION = "regression" # Quality regression patterns
37
+ IMPROVEMENT = "improvement" # Quality improvement patterns
38
+
39
+
40
+ @dataclass
41
+ class QualityAnomaly:
42
+ """Detected quality anomaly with ML confidence."""
43
+
44
+ anomaly_type: AnomalyType
45
+ metric_name: str
46
+ detected_at: datetime
47
+ confidence: float # 0.0 to 1.0
48
+ severity: AlertSeverity
49
+ description: str
50
+ actual_value: float
51
+ expected_value: float
52
+ deviation_sigma: float # Standard deviations from normal
53
+ context: dict[str, t.Any] = field(default_factory=dict[str, t.Any])
54
+
55
+ def to_dict(self) -> dict[str, t.Any]:
56
+ data = {
57
+ "anomaly_type": self.anomaly_type,
58
+ "metric_name": self.metric_name,
59
+ "detected_at": self.detected_at.isoformat(),
60
+ "confidence": self.confidence,
61
+ "severity": self.severity,
62
+ "description": self.description,
63
+ "actual_value": self.actual_value,
64
+ "expected_value": self.expected_value,
65
+ "deviation_sigma": self.deviation_sigma,
66
+ "context": self.context,
67
+ }
68
+ return data
69
+
70
+
71
+ @dataclass
72
+ class QualityPattern:
73
+ """Identified quality pattern with statistical analysis."""
74
+
75
+ pattern_type: PatternType
76
+ metric_names: list[str]
77
+ detected_at: datetime
78
+ confidence: float
79
+ description: str
80
+ period_days: int
81
+ correlation_strength: float # For correlation patterns
82
+ trend_direction: TrendDirection
83
+ statistical_significance: float # p-value
84
+ context: dict[str, t.Any] = field(default_factory=dict[str, t.Any])
85
+
86
+ def to_dict(self) -> dict[str, t.Any]:
87
+ return {
88
+ "pattern_type": self.pattern_type,
89
+ "metric_names": self.metric_names,
90
+ "detected_at": self.detected_at.isoformat(),
91
+ "confidence": self.confidence,
92
+ "description": self.description,
93
+ "period_days": self.period_days,
94
+ "correlation_strength": self.correlation_strength,
95
+ "trend_direction": self.trend_direction,
96
+ "statistical_significance": self.statistical_significance,
97
+ "context": self.context,
98
+ }
99
+
100
+
101
+ @dataclass
102
+ class QualityPrediction:
103
+ """Advanced quality prediction with confidence intervals."""
104
+
105
+ metric_name: str
106
+ predicted_value: float
107
+ confidence_lower: float
108
+ confidence_upper: float
109
+ confidence_level: float # e.g., 0.95 for 95% confidence
110
+ prediction_horizon_days: int
111
+ prediction_method: str
112
+ created_at: datetime
113
+ factors: list[str] = field(default_factory=list)
114
+ risk_assessment: str = "low" # low, medium, high
115
+
116
+ def to_dict(self) -> dict[str, t.Any]:
117
+ return {
118
+ "metric_name": self.metric_name,
119
+ "predicted_value": self.predicted_value,
120
+ "confidence_lower": self.confidence_lower,
121
+ "confidence_upper": self.confidence_upper,
122
+ "confidence_level": self.confidence_level,
123
+ "prediction_horizon_days": self.prediction_horizon_days,
124
+ "prediction_method": self.prediction_method,
125
+ "created_at": self.created_at.isoformat(),
126
+ "factors": self.factors,
127
+ "risk_assessment": self.risk_assessment,
128
+ }
129
+
130
+
131
+ @dataclass
132
+ class QualityInsights:
133
+ """Comprehensive quality insights with ML analysis."""
134
+
135
+ anomalies: list[QualityAnomaly]
136
+ patterns: list[QualityPattern]
137
+ predictions: list[QualityPrediction]
138
+ recommendations: list[str]
139
+ overall_health_score: float # 0.0 to 1.0
140
+ risk_level: str # low, medium, high, critical
141
+ generated_at: datetime = field(default_factory=datetime.now)
142
+
143
+ def to_dict(self) -> dict[str, t.Any]:
144
+ return {
145
+ "anomalies": [a.to_dict() for a in self.anomalies],
146
+ "patterns": [p.to_dict() for p in self.patterns],
147
+ "predictions": [p.to_dict() for p in self.predictions],
148
+ "recommendations": self.recommendations,
149
+ "overall_health_score": self.overall_health_score,
150
+ "risk_level": self.risk_level,
151
+ "generated_at": self.generated_at.isoformat(),
152
+ }
153
+
154
+
155
+ class QualityIntelligenceService:
156
+ """Advanced ML-based quality intelligence service."""
157
+
158
+ def __init__(
159
+ self,
160
+ quality_service: EnhancedQualityBaselineService,
161
+ anomaly_sensitivity: float = 2.0, # Standard deviations for anomaly detection
162
+ min_data_points: int = 10,
163
+ ) -> None:
164
+ self.quality_service = quality_service
165
+ self.anomaly_sensitivity = anomaly_sensitivity
166
+ self.min_data_points = min_data_points
167
+
168
+ def detect_anomalies(
169
+ self, days: int = 30, metrics: list[str] | None = None
170
+ ) -> list[QualityAnomaly]:
171
+ """Detect anomalies in quality metrics using statistical analysis."""
172
+ metrics = self._get_default_metrics() if metrics is None else metrics
173
+
174
+ baselines = self.quality_service.get_recent_baselines(limit=days * 2)
175
+ if len(baselines) < self.min_data_points:
176
+ return []
177
+
178
+ anomalies = []
179
+ for metric_name in metrics:
180
+ metric_anomalies = self._detect_metric_anomalies(metric_name, baselines)
181
+ anomalies.extend(metric_anomalies)
182
+
183
+ return anomalies
184
+
185
+ def _get_default_metrics(self) -> list[str]:
186
+ """Get default metrics list[t.Any] for anomaly detection."""
187
+ return [
188
+ "quality_score",
189
+ "coverage_percent",
190
+ "hook_failures",
191
+ "security_issues",
192
+ "type_errors",
193
+ "linting_issues",
194
+ ]
195
+
196
+ def _detect_metric_anomalies(
197
+ self, metric_name: str, baselines: list[t.Any]
198
+ ) -> list[QualityAnomaly]:
199
+ """Detect anomalies for a specific metric."""
200
+ values, timestamps = self._extract_metric_values(metric_name, baselines)
201
+
202
+ if len(values) < self.min_data_points:
203
+ return []
204
+
205
+ stats_data = self._calculate_statistical_metrics(values)
206
+ if stats_data is None: # No variation
207
+ return []
208
+
209
+ return self._identify_outlier_anomalies(
210
+ metric_name, values, timestamps, stats_data
211
+ )
212
+
213
+ def _extract_metric_values(
214
+ self, metric_name: str, baselines: list[t.Any]
215
+ ) -> tuple[list[float], list[t.Any]]:
216
+ """Extract metric values and timestamps from baselines."""
217
+ values = []
218
+ timestamps = []
219
+
220
+ for baseline in baselines:
221
+ metric_value = self._get_baseline_metric_value(baseline, metric_name)
222
+ if metric_value is not None:
223
+ values.append(metric_value)
224
+ timestamps.append(baseline.timestamp)
225
+
226
+ return values, timestamps
227
+
228
+ def _get_baseline_metric_value(
229
+ self, baseline: t.Any, metric_name: str
230
+ ) -> float | None:
231
+ """Get metric value from baseline object."""
232
+ metric_mapping = {
233
+ "quality_score": baseline.quality_score,
234
+ "coverage_percent": baseline.coverage_percent,
235
+ "hook_failures": baseline.hook_failures,
236
+ "security_issues": baseline.security_issues,
237
+ "type_errors": baseline.type_errors,
238
+ "linting_issues": baseline.linting_issues,
239
+ }
240
+ return metric_mapping.get(metric_name)
241
+
242
+ def _calculate_statistical_metrics(
243
+ self, values: list[float]
244
+ ) -> dict[str, float] | None:
245
+ """Calculate statistical metrics for anomaly detection."""
246
+ values_array = np.array(values)
247
+ mean_val = np.mean(values_array)
248
+ std_val = np.std(values_array)
249
+
250
+ if std_val == 0:
251
+ return None # No variation to detect anomalies
252
+
253
+ z_scores = np.abs((values_array - mean_val) / std_val)
254
+
255
+ return {
256
+ "mean": mean_val,
257
+ "std": std_val,
258
+ "z_scores": z_scores,
259
+ "values_array": values_array,
260
+ }
261
+
262
+ def _identify_outlier_anomalies(
263
+ self,
264
+ metric_name: str,
265
+ values: list[float],
266
+ timestamps: list[t.Any],
267
+ stats_data: dict[str, float],
268
+ ) -> list[QualityAnomaly]:
269
+ """Identify outlier anomalies based on z-scores."""
270
+ anomalies = []
271
+ z_scores = stats_data["z_scores"]
272
+ mean_val = stats_data["mean"]
273
+ std_val = stats_data["std"]
274
+
275
+ for i, (value, timestamp, z_score) in enumerate(
276
+ zip(values, timestamps, z_scores)
277
+ ):
278
+ if z_score > self.anomaly_sensitivity:
279
+ anomaly = self._create_anomaly_object(
280
+ metric_name,
281
+ value,
282
+ timestamp,
283
+ z_score,
284
+ mean_val,
285
+ std_val,
286
+ i,
287
+ len(values),
288
+ )
289
+ anomalies.append(anomaly)
290
+
291
+ return anomalies
292
+
293
+ def _create_anomaly_object(
294
+ self,
295
+ metric_name: str,
296
+ value: float,
297
+ timestamp: t.Any,
298
+ z_score: float,
299
+ mean_val: float,
300
+ std_val: float,
301
+ position: int,
302
+ data_points: int,
303
+ ) -> QualityAnomaly:
304
+ """Create QualityAnomaly object from detected outlier."""
305
+ anomaly_type, severity = self._determine_anomaly_type_and_severity(
306
+ value, mean_val, z_score
307
+ )
308
+ confidence = min(1.0, z_score / 4.0) # Scale to 0-1
309
+
310
+ return QualityAnomaly(
311
+ anomaly_type=anomaly_type,
312
+ metric_name=metric_name,
313
+ detected_at=timestamp,
314
+ confidence=confidence,
315
+ severity=severity,
316
+ description=f"{metric_name} {anomaly_type} detected: {value:.2f} (expected ~{mean_val:.2f})",
317
+ actual_value=value,
318
+ expected_value=mean_val,
319
+ deviation_sigma=z_score,
320
+ context={
321
+ "metric_mean": mean_val,
322
+ "metric_std": std_val,
323
+ "data_points": data_points,
324
+ "position_in_series": position,
325
+ },
326
+ )
327
+
328
+ def _determine_anomaly_type_and_severity(
329
+ self, value: float, mean_val: float, z_score: float
330
+ ) -> tuple[AnomalyType, AlertSeverity]:
331
+ """Determine anomaly type and severity based on value and z-score."""
332
+ if value > mean_val:
333
+ anomaly_type = AnomalyType.SPIKE
334
+ else:
335
+ anomaly_type = AnomalyType.DROP
336
+
337
+ severity = AlertSeverity.CRITICAL if z_score > 3.0 else AlertSeverity.WARNING
338
+
339
+ return anomaly_type, severity
340
+
341
+ def identify_patterns(self, days: int = 60) -> list[QualityPattern]:
342
+ """Identify patterns in quality metrics using correlation and trend analysis."""
343
+ baselines = self.quality_service.get_recent_baselines(limit=days * 2)
344
+ if len(baselines) < self.min_data_points:
345
+ return []
346
+
347
+ metrics_data = self._extract_metrics_data(baselines)
348
+ return self._find_correlation_patterns(metrics_data, days)
349
+
350
+ def _extract_metrics_data(self, baselines: list[t.Any]) -> dict[str, list[float]]:
351
+ """Extract metric data from baselines for correlation analysis."""
352
+ metrics_data = {
353
+ "quality_score": [],
354
+ "coverage_percent": [],
355
+ "hook_failures": [],
356
+ "security_issues": [],
357
+ "type_errors": [],
358
+ "linting_issues": [],
359
+ }
360
+
361
+ for baseline in baselines:
362
+ metrics_data["quality_score"].append(baseline.quality_score)
363
+ metrics_data["coverage_percent"].append(baseline.coverage_percent)
364
+ metrics_data["hook_failures"].append(baseline.hook_failures)
365
+ metrics_data["security_issues"].append(baseline.security_issues)
366
+ metrics_data["type_errors"].append(baseline.type_errors)
367
+ metrics_data["linting_issues"].append(baseline.linting_issues)
368
+
369
+ return metrics_data
370
+
371
+ def _find_correlation_patterns(
372
+ self, metrics_data: dict[str, list[float]], days: int
373
+ ) -> list[QualityPattern]:
374
+ """Find correlation patterns between metrics."""
375
+ patterns = []
376
+ metric_names = list[t.Any](metrics_data.keys())
377
+
378
+ for i, metric1 in enumerate(metric_names):
379
+ for metric2 in metric_names[i + 1 :]:
380
+ pattern = self._analyze_metric_correlation(
381
+ metric1, metric2, metrics_data, days
382
+ )
383
+ if pattern:
384
+ patterns.append(pattern)
385
+
386
+ return patterns
387
+
388
+ def _analyze_metric_correlation(
389
+ self,
390
+ metric1: str,
391
+ metric2: str,
392
+ metrics_data: dict[str, list[float]],
393
+ days: int,
394
+ ) -> QualityPattern | None:
395
+ """Analyze correlation between two metrics."""
396
+ values1 = np.array(metrics_data[metric1])
397
+ values2 = np.array(metrics_data[metric2])
398
+
399
+ if len(values1) < self.min_data_points:
400
+ return None
401
+
402
+ correlation, p_value = stats.pearsonr(values1, values2)
403
+
404
+ # Strong correlation threshold
405
+ if abs(correlation) > 0.7 and p_value < 0.05:
406
+ return self._create_correlation_pattern(
407
+ metric1, metric2, correlation, p_value, values1, days
408
+ )
409
+
410
+ return None
411
+
412
+ def _create_correlation_pattern(
413
+ self,
414
+ metric1: str,
415
+ metric2: str,
416
+ correlation: float,
417
+ p_value: float,
418
+ values1: np.ndarray,
419
+ days: int,
420
+ ) -> QualityPattern:
421
+ """Create a quality pattern from correlation analysis."""
422
+ trend_dir, description = self._get_correlation_trend_and_description(
423
+ metric1, metric2, correlation
424
+ )
425
+
426
+ return QualityPattern(
427
+ pattern_type=PatternType.CORRELATION,
428
+ metric_names=[metric1, metric2],
429
+ detected_at=datetime.now(),
430
+ confidence=abs(correlation),
431
+ description=description,
432
+ period_days=days,
433
+ correlation_strength=abs(correlation),
434
+ trend_direction=trend_dir,
435
+ statistical_significance=p_value,
436
+ context={
437
+ "correlation_coefficient": correlation,
438
+ "sample_size": len(values1),
439
+ "strength": self._get_correlation_strength_label(correlation),
440
+ },
441
+ )
442
+
443
+ def _get_correlation_trend_and_description(
444
+ self, metric1: str, metric2: str, correlation: float
445
+ ) -> tuple[TrendDirection, str]:
446
+ """Get trend direction and description for correlation."""
447
+ if correlation > 0:
448
+ return (
449
+ TrendDirection.IMPROVING,
450
+ f"Strong positive correlation between {metric1} and {metric2}",
451
+ )
452
+ return (
453
+ TrendDirection.DECLINING,
454
+ f"Strong negative correlation between {metric1} and {metric2}",
455
+ )
456
+
457
+ def _get_correlation_strength_label(self, correlation: float) -> str:
458
+ """Get strength label for correlation coefficient."""
459
+ abs_corr = abs(correlation)
460
+ if abs_corr > 0.9:
461
+ return "very strong"
462
+ elif abs_corr > 0.7:
463
+ return "strong"
464
+ return "moderate"
465
+
466
+ def generate_advanced_predictions(
467
+ self, horizon_days: int = 14, confidence_level: float = 0.95
468
+ ) -> list[QualityPrediction]:
469
+ """Generate advanced predictions with confidence intervals."""
470
+ baselines = self.quality_service.get_recent_baselines(limit=90)
471
+ if len(baselines) < self.min_data_points:
472
+ return []
473
+
474
+ predictions = []
475
+ metrics = ["quality_score", "coverage_percent"]
476
+
477
+ for metric_name in metrics:
478
+ values, timestamps = self._extract_time_series(baselines, metric_name)
479
+
480
+ if len(values) < self.min_data_points:
481
+ continue
482
+
483
+ prediction = self._create_metric_prediction(
484
+ metric_name, values, horizon_days, confidence_level
485
+ )
486
+ predictions.append(prediction)
487
+
488
+ return predictions
489
+
490
+ def _extract_time_series(
491
+ self, baselines: list[t.Any], metric_name: str
492
+ ) -> tuple[list[t.Any], list[t.Any]]:
493
+ """Extract time series data for specified metric."""
494
+ values = []
495
+ timestamps = []
496
+
497
+ for baseline in baselines:
498
+ if metric_name == "quality_score":
499
+ values.append(baseline.quality_score)
500
+ elif metric_name == "coverage_percent":
501
+ values.append(baseline.coverage_percent)
502
+ timestamps.append(baseline.timestamp)
503
+
504
+ return values, timestamps
505
+
506
+ def _create_metric_prediction(
507
+ self,
508
+ metric_name: str,
509
+ values: list[t.Any],
510
+ horizon_days: int,
511
+ confidence_level: float,
512
+ ) -> QualityPrediction:
513
+ """Create prediction for a single metric."""
514
+ regression_results = self._perform_linear_regression(values, horizon_days)
515
+ confidence_bounds = self._calculate_confidence_interval(
516
+ values, regression_results, confidence_level
517
+ )
518
+ risk_level = self._assess_prediction_risk(
519
+ metric_name, regression_results["predicted_value"]
520
+ )
521
+
522
+ return QualityPrediction(
523
+ metric_name=metric_name,
524
+ predicted_value=float(regression_results["predicted_value"]),
525
+ confidence_lower=float(confidence_bounds["lower"]),
526
+ confidence_upper=float(confidence_bounds["upper"]),
527
+ confidence_level=confidence_level,
528
+ prediction_horizon_days=horizon_days,
529
+ prediction_method="linear_regression_with_confidence_intervals",
530
+ created_at=datetime.now(),
531
+ factors=["historical_trend", "statistical_analysis"],
532
+ risk_assessment=risk_level,
533
+ )
534
+
535
+ def _perform_linear_regression(
536
+ self, values: list[t.Any], horizon_days: int
537
+ ) -> dict[str, t.Any]:
538
+ """Perform linear regression and predict future value."""
539
+ values_array = np.array(values)
540
+ time_indices = np.arange(len(values))
541
+
542
+ slope, intercept, r_value, p_value, std_err = stats.linregress(
543
+ time_indices, values_array
544
+ )
545
+
546
+ future_index = len(values) + horizon_days
547
+ predicted_value = slope * future_index + intercept
548
+
549
+ return {
550
+ "slope": slope,
551
+ "intercept": intercept,
552
+ "predicted_value": predicted_value,
553
+ "time_indices": time_indices,
554
+ "values_array": values_array,
555
+ "horizon_days": horizon_days,
556
+ }
557
+
558
+ def _calculate_confidence_interval(
559
+ self,
560
+ values: list[t.Any],
561
+ regression_results: dict[str, t.Any],
562
+ confidence_level: float,
563
+ ) -> dict[str, t.Any]:
564
+ """Calculate confidence interval for prediction."""
565
+ slope = regression_results["slope"]
566
+ intercept = regression_results["intercept"]
567
+ time_indices = regression_results["time_indices"]
568
+ values_array = regression_results["values_array"]
569
+ predicted_value = regression_results["predicted_value"]
570
+
571
+ residuals = values_array - (slope * time_indices + intercept)
572
+ residual_std = np.std(residuals)
573
+
574
+ future_index = len(values) + regression_results["horizon_days"]
575
+ t_value = stats.t.ppf((1 + confidence_level) / 2, len(values) - 2)
576
+
577
+ margin_error = self._calculate_margin_error(
578
+ t_value, residual_std, len(values), future_index, time_indices
579
+ )
580
+
581
+ return {
582
+ "lower": predicted_value - margin_error,
583
+ "upper": predicted_value + margin_error,
584
+ }
585
+
586
+ def _calculate_margin_error(
587
+ self,
588
+ t_value: float,
589
+ residual_std: float,
590
+ n_values: int,
591
+ future_index: int,
592
+ time_indices: np.ndarray,
593
+ ) -> float:
594
+ """Calculate margin of error for confidence interval."""
595
+ mean_time: float = float(np.mean(time_indices))
596
+ sum_sq_diff: float = float(np.sum((time_indices - mean_time) ** 2))
597
+ numerator: float = (future_index - mean_time) ** 2
598
+
599
+ sqrt_term: float = float(np.sqrt(1 + 1 / n_values + numerator / sum_sq_diff))
600
+ return t_value * residual_std * sqrt_term
601
+
602
+ def _assess_prediction_risk(self, metric_name: str, predicted_value: float) -> str:
603
+ """Assess risk level based on predicted value."""
604
+ if metric_name == "quality_score":
605
+ return self._assess_quality_score_risk(predicted_value)
606
+ # coverage_percent
607
+ return self._assess_coverage_risk(predicted_value)
608
+
609
+ def _assess_quality_score_risk(self, predicted_value: float) -> str:
610
+ """Assess risk for quality score predictions."""
611
+ if predicted_value < 70:
612
+ return "critical"
613
+ elif predicted_value < 80:
614
+ return "high"
615
+ elif predicted_value < 90:
616
+ return "medium"
617
+ return "low"
618
+
619
+ def _assess_coverage_risk(self, predicted_value: float) -> str:
620
+ """Assess risk for coverage predictions."""
621
+ if predicted_value < 70:
622
+ return "high"
623
+ elif predicted_value < 85:
624
+ return "medium"
625
+ return "low"
626
+
627
+ def generate_ml_recommendations(
628
+ self,
629
+ anomalies: list[QualityAnomaly],
630
+ patterns: list[QualityPattern],
631
+ predictions: list[QualityPrediction],
632
+ ) -> list[str]:
633
+ """Generate intelligent recommendations based on ML analysis."""
634
+ recommendations = []
635
+
636
+ # Anomaly-based recommendations
637
+ critical_anomalies = [
638
+ a for a in anomalies if a.severity == AlertSeverity.CRITICAL
639
+ ]
640
+ if critical_anomalies:
641
+ recommendations.append(
642
+ f"🚨 CRITICAL: {len(critical_anomalies)} critical anomalies detected - immediate investigation required"
643
+ )
644
+
645
+ quality_drops = [
646
+ a
647
+ for a in anomalies
648
+ if a.anomaly_type == AnomalyType.DROP and a.metric_name == "quality_score"
649
+ ]
650
+ if quality_drops:
651
+ recommendations.append(
652
+ "📉 Quality score drops detected - review recent commits and implement quality gates"
653
+ )
654
+
655
+ # Pattern-based recommendations
656
+ declining_correlations = [
657
+ p for p in patterns if p.trend_direction == TrendDirection.DECLINING
658
+ ]
659
+ if declining_correlations:
660
+ recommendations.append(
661
+ f"⚠️ Negative quality correlations identified - investigate dependencies between {declining_correlations[0].metric_names}"
662
+ )
663
+
664
+ strong_patterns = [p for p in patterns if p.confidence > 0.8]
665
+ if strong_patterns:
666
+ recommendations.append(
667
+ "📊 Strong quality patterns detected - leverage insights for predictive quality management"
668
+ )
669
+
670
+ # Prediction-based recommendations
671
+ high_risk_predictions = [
672
+ p for p in predictions if p.risk_assessment in ("high", "critical")
673
+ ]
674
+ if high_risk_predictions:
675
+ metrics = [p.metric_name for p in high_risk_predictions]
676
+ recommendations.append(
677
+ f"🔮 High-risk quality forecast for {', '.join(metrics)} - proactive intervention recommended"
678
+ )
679
+
680
+ low_confidence_predictions = [
681
+ p for p in predictions if p.confidence_upper - p.confidence_lower > 20
682
+ ]
683
+ if low_confidence_predictions:
684
+ recommendations.append(
685
+ "📈 Wide prediction intervals detected - increase data collection frequency for better forecasting"
686
+ )
687
+
688
+ # General ML insights
689
+ if len(anomalies) > 5:
690
+ recommendations.append(
691
+ f"🤖 High anomaly frequency ({len(anomalies)}) suggests systemic quality issues - consider ML-based automated quality monitoring"
692
+ )
693
+
694
+ if not recommendations:
695
+ recommendations.append(
696
+ "✅ Quality metrics show stable patterns with no significant anomalies detected - maintain current practices"
697
+ )
698
+
699
+ return recommendations
700
+
701
+ def generate_comprehensive_insights(
702
+ self, analysis_days: int = 30, prediction_days: int = 14
703
+ ) -> QualityInsights:
704
+ """Generate comprehensive quality insights with ML analysis."""
705
+ # Collect all analysis results
706
+ anomalies = self.detect_anomalies(days=analysis_days)
707
+ patterns = self.identify_patterns(days=analysis_days * 2)
708
+ predictions = self.generate_advanced_predictions(horizon_days=prediction_days)
709
+ recommendations = self.generate_ml_recommendations(
710
+ anomalies, patterns, predictions
711
+ )
712
+
713
+ # Calculate derived metrics
714
+ health_score, risk_level = self._calculate_health_metrics(
715
+ anomalies, predictions
716
+ )
717
+
718
+ return QualityInsights(
719
+ anomalies=anomalies,
720
+ patterns=patterns,
721
+ predictions=predictions,
722
+ recommendations=recommendations,
723
+ overall_health_score=health_score,
724
+ risk_level=risk_level,
725
+ )
726
+
727
+ def _calculate_health_metrics(
728
+ self, anomalies: list[QualityAnomaly], predictions: list[QualityPrediction]
729
+ ) -> tuple[float, str]:
730
+ """Calculate overall health score and risk level."""
731
+ anomaly_counts = self._count_anomalies_by_severity(anomalies)
732
+ risk_prediction_count = self._count_high_risk_predictions(predictions)
733
+
734
+ health_score = self._compute_health_score(anomaly_counts, risk_prediction_count)
735
+ risk_level = self._determine_risk_level(health_score)
736
+
737
+ return health_score, risk_level
738
+
739
+ def _count_anomalies_by_severity(
740
+ self, anomalies: list[QualityAnomaly]
741
+ ) -> dict[str, int]:
742
+ """Count anomalies by severity level."""
743
+ return {
744
+ "critical": len(
745
+ [a for a in anomalies if a.severity == AlertSeverity.CRITICAL]
746
+ ),
747
+ "warning": len(
748
+ [a for a in anomalies if a.severity == AlertSeverity.WARNING]
749
+ ),
750
+ }
751
+
752
+ def _count_high_risk_predictions(self, predictions: list[QualityPrediction]) -> int:
753
+ """Count predictions with high or critical risk assessment."""
754
+ return len(
755
+ [p for p in predictions if p.risk_assessment in ("high", "critical")]
756
+ )
757
+
758
+ def _compute_health_score(
759
+ self, anomaly_counts: dict[str, int], risk_predictions: int
760
+ ) -> float:
761
+ """Compute health score based on anomalies and risk predictions."""
762
+ health_score = 1.0
763
+ health_score -= (
764
+ anomaly_counts["critical"] * 0.2
765
+ ) # Critical anomalies heavily impact health
766
+ health_score -= (
767
+ anomaly_counts["warning"] * 0.1
768
+ ) # Warning anomalies moderately impact health
769
+ health_score -= risk_predictions * 0.15 # High-risk predictions impact health
770
+ return max(0.0, min(1.0, health_score))
771
+
772
+ def _determine_risk_level(self, health_score: float) -> str:
773
+ """Determine overall risk level based on health score."""
774
+ if health_score < 0.5:
775
+ return "critical"
776
+ elif health_score < 0.7:
777
+ return "high"
778
+ elif health_score < 0.85:
779
+ return "medium"
780
+ return "low"
781
+
782
+ def export_insights(self, insights: QualityInsights, output_path: Path) -> None:
783
+ """Export quality insights to JSON file."""
784
+ with output_path.open("w") as f:
785
+ json.dump(insights.to_dict(), f, indent=2, default=str)