ml4t-diagnostic 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. ml4t/diagnostic/AGENT.md +25 -0
  2. ml4t/diagnostic/__init__.py +166 -0
  3. ml4t/diagnostic/backends/__init__.py +10 -0
  4. ml4t/diagnostic/backends/adapter.py +192 -0
  5. ml4t/diagnostic/backends/polars_backend.py +899 -0
  6. ml4t/diagnostic/caching/__init__.py +40 -0
  7. ml4t/diagnostic/caching/cache.py +331 -0
  8. ml4t/diagnostic/caching/decorators.py +131 -0
  9. ml4t/diagnostic/caching/smart_cache.py +339 -0
  10. ml4t/diagnostic/config/AGENT.md +24 -0
  11. ml4t/diagnostic/config/README.md +267 -0
  12. ml4t/diagnostic/config/__init__.py +219 -0
  13. ml4t/diagnostic/config/barrier_config.py +277 -0
  14. ml4t/diagnostic/config/base.py +301 -0
  15. ml4t/diagnostic/config/event_config.py +148 -0
  16. ml4t/diagnostic/config/feature_config.py +404 -0
  17. ml4t/diagnostic/config/multi_signal_config.py +55 -0
  18. ml4t/diagnostic/config/portfolio_config.py +215 -0
  19. ml4t/diagnostic/config/report_config.py +391 -0
  20. ml4t/diagnostic/config/sharpe_config.py +202 -0
  21. ml4t/diagnostic/config/signal_config.py +206 -0
  22. ml4t/diagnostic/config/trade_analysis_config.py +310 -0
  23. ml4t/diagnostic/config/validation.py +279 -0
  24. ml4t/diagnostic/core/__init__.py +29 -0
  25. ml4t/diagnostic/core/numba_utils.py +315 -0
  26. ml4t/diagnostic/core/purging.py +372 -0
  27. ml4t/diagnostic/core/sampling.py +471 -0
  28. ml4t/diagnostic/errors/__init__.py +205 -0
  29. ml4t/diagnostic/evaluation/AGENT.md +26 -0
  30. ml4t/diagnostic/evaluation/__init__.py +437 -0
  31. ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
  32. ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
  33. ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
  34. ml4t/diagnostic/evaluation/dashboard.py +715 -0
  35. ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
  36. ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
  37. ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
  38. ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
  39. ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
  40. ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
  41. ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
  42. ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
  43. ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
  44. ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
  45. ml4t/diagnostic/evaluation/event_analysis.py +647 -0
  46. ml4t/diagnostic/evaluation/excursion.py +390 -0
  47. ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
  48. ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
  49. ml4t/diagnostic/evaluation/framework.py +935 -0
  50. ml4t/diagnostic/evaluation/metric_registry.py +255 -0
  51. ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
  52. ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
  53. ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
  54. ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
  55. ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
  56. ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
  57. ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
  58. ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
  59. ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
  60. ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
  61. ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
  62. ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
  63. ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
  64. ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
  65. ml4t/diagnostic/evaluation/multi_signal.py +550 -0
  66. ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
  67. ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
  68. ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
  69. ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
  70. ml4t/diagnostic/evaluation/report_generation.py +824 -0
  71. ml4t/diagnostic/evaluation/signal_selector.py +452 -0
  72. ml4t/diagnostic/evaluation/stat_registry.py +139 -0
  73. ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
  74. ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
  75. ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
  76. ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
  77. ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
  78. ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
  79. ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
  80. ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
  81. ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
  82. ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
  83. ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
  84. ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
  85. ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
  86. ml4t/diagnostic/evaluation/stats/moments.py +164 -0
  87. ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
  88. ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
  89. ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
  90. ml4t/diagnostic/evaluation/themes.py +330 -0
  91. ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
  92. ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
  93. ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
  94. ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
  95. ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
  96. ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
  97. ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
  98. ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
  99. ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
  100. ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
  101. ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
  102. ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
  103. ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
  104. ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
  105. ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
  106. ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
  107. ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
  108. ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
  109. ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
  110. ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
  111. ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
  112. ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
  113. ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
  114. ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
  115. ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
  116. ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
  117. ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
  118. ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
  119. ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
  120. ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
  121. ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
  122. ml4t/diagnostic/evaluation/validated_cv.py +535 -0
  123. ml4t/diagnostic/evaluation/visualization.py +1050 -0
  124. ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
  125. ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
  126. ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
  127. ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
  128. ml4t/diagnostic/integration/__init__.py +48 -0
  129. ml4t/diagnostic/integration/backtest_contract.py +671 -0
  130. ml4t/diagnostic/integration/data_contract.py +316 -0
  131. ml4t/diagnostic/integration/engineer_contract.py +226 -0
  132. ml4t/diagnostic/logging/__init__.py +77 -0
  133. ml4t/diagnostic/logging/logger.py +245 -0
  134. ml4t/diagnostic/logging/performance.py +234 -0
  135. ml4t/diagnostic/logging/progress.py +234 -0
  136. ml4t/diagnostic/logging/wandb.py +412 -0
  137. ml4t/diagnostic/metrics/__init__.py +9 -0
  138. ml4t/diagnostic/metrics/percentiles.py +128 -0
  139. ml4t/diagnostic/py.typed +1 -0
  140. ml4t/diagnostic/reporting/__init__.py +43 -0
  141. ml4t/diagnostic/reporting/base.py +130 -0
  142. ml4t/diagnostic/reporting/html_renderer.py +275 -0
  143. ml4t/diagnostic/reporting/json_renderer.py +51 -0
  144. ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
  145. ml4t/diagnostic/results/AGENT.md +24 -0
  146. ml4t/diagnostic/results/__init__.py +105 -0
  147. ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
  148. ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
  149. ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
  150. ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
  151. ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
  152. ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
  153. ml4t/diagnostic/results/barrier_results/validation.py +38 -0
  154. ml4t/diagnostic/results/base.py +177 -0
  155. ml4t/diagnostic/results/event_results.py +349 -0
  156. ml4t/diagnostic/results/feature_results.py +787 -0
  157. ml4t/diagnostic/results/multi_signal_results.py +431 -0
  158. ml4t/diagnostic/results/portfolio_results.py +281 -0
  159. ml4t/diagnostic/results/sharpe_results.py +448 -0
  160. ml4t/diagnostic/results/signal_results/__init__.py +74 -0
  161. ml4t/diagnostic/results/signal_results/ic.py +581 -0
  162. ml4t/diagnostic/results/signal_results/irtc.py +110 -0
  163. ml4t/diagnostic/results/signal_results/quantile.py +392 -0
  164. ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
  165. ml4t/diagnostic/results/signal_results/turnover.py +213 -0
  166. ml4t/diagnostic/results/signal_results/validation.py +147 -0
  167. ml4t/diagnostic/signal/AGENT.md +17 -0
  168. ml4t/diagnostic/signal/__init__.py +69 -0
  169. ml4t/diagnostic/signal/_report.py +152 -0
  170. ml4t/diagnostic/signal/_utils.py +261 -0
  171. ml4t/diagnostic/signal/core.py +275 -0
  172. ml4t/diagnostic/signal/quantile.py +148 -0
  173. ml4t/diagnostic/signal/result.py +214 -0
  174. ml4t/diagnostic/signal/signal_ic.py +129 -0
  175. ml4t/diagnostic/signal/turnover.py +182 -0
  176. ml4t/diagnostic/splitters/AGENT.md +19 -0
  177. ml4t/diagnostic/splitters/__init__.py +36 -0
  178. ml4t/diagnostic/splitters/base.py +501 -0
  179. ml4t/diagnostic/splitters/calendar.py +421 -0
  180. ml4t/diagnostic/splitters/calendar_config.py +91 -0
  181. ml4t/diagnostic/splitters/combinatorial.py +1064 -0
  182. ml4t/diagnostic/splitters/config.py +322 -0
  183. ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
  184. ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
  185. ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
  186. ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
  187. ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
  188. ml4t/diagnostic/splitters/group_isolation.py +329 -0
  189. ml4t/diagnostic/splitters/persistence.py +316 -0
  190. ml4t/diagnostic/splitters/utils.py +207 -0
  191. ml4t/diagnostic/splitters/walk_forward.py +757 -0
  192. ml4t/diagnostic/utils/__init__.py +42 -0
  193. ml4t/diagnostic/utils/config.py +542 -0
  194. ml4t/diagnostic/utils/dependencies.py +318 -0
  195. ml4t/diagnostic/utils/sessions.py +127 -0
  196. ml4t/diagnostic/validation/__init__.py +54 -0
  197. ml4t/diagnostic/validation/dataframe.py +274 -0
  198. ml4t/diagnostic/validation/returns.py +280 -0
  199. ml4t/diagnostic/validation/timeseries.py +299 -0
  200. ml4t/diagnostic/visualization/AGENT.md +19 -0
  201. ml4t/diagnostic/visualization/__init__.py +223 -0
  202. ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
  203. ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
  204. ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
  205. ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
  206. ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
  207. ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
  208. ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
  209. ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
  210. ml4t/diagnostic/visualization/barrier_plots.py +782 -0
  211. ml4t/diagnostic/visualization/core.py +1060 -0
  212. ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
  213. ml4t/diagnostic/visualization/dashboards/base.py +582 -0
  214. ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
  215. ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
  216. ml4t/diagnostic/visualization/dashboards.py +43 -0
  217. ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
  218. ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
  219. ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
  220. ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
  221. ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
  222. ml4t/diagnostic/visualization/feature_plots.py +888 -0
  223. ml4t/diagnostic/visualization/interaction_plots.py +618 -0
  224. ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
  225. ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
  226. ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
  227. ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
  228. ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
  229. ml4t/diagnostic/visualization/report_generation.py +1343 -0
  230. ml4t/diagnostic/visualization/signal/__init__.py +103 -0
  231. ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
  232. ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
  233. ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
  234. ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
  235. ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
  236. ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
  237. ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
  238. ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
  239. ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
  240. ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
  241. ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
  242. ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,873 @@
1
+ """Feature-level diagnostic analysis for quantitative trading signals.
2
+
3
+ This module provides the main API for comprehensive feature diagnostic testing,
4
+ orchestrating all Module A diagnostic capabilities:
5
+
6
+ - Stationarity analysis (ADF, KPSS, PP tests)
7
+ - Autocorrelation analysis (ACF, PACF)
8
+ - Volatility clustering (ARCH-LM, GARCH)
9
+ - Distribution diagnostics (moments, normality, heavy tails)
10
+
11
+ The FeatureDiagnostics class provides a unified interface for running all
12
+ diagnostic tests on trading features/signals, with configurable test selection
13
+ and batch processing capabilities.
14
+
15
+ Key Concept:
16
+ Before using features in ML models or calculating feature-outcome relationships,
17
+ you must understand their statistical properties. FeatureDiagnostics provides
18
+ a comprehensive health check of feature quality.
19
+
20
+ Typical Workflow:
21
+ 1. Create FeatureDiagnosticsConfig specifying which tests to run
22
+ 2. Initialize FeatureDiagnostics with config
23
+ 3. Call run_diagnostics() on feature time series
24
+ 4. Review FeatureDiagnosticsResult for insights
25
+ 5. Transform features based on diagnostic results
26
+ 6. Re-run diagnostics on transformed features
27
+
28
+ Example:
29
+ >>> import numpy as np
30
+ >>> from ml4t.diagnostic.evaluation.feature_diagnostics import (
31
+ ... FeatureDiagnostics,
32
+ ... FeatureDiagnosticsConfig
33
+ ... )
34
+ >>>
35
+ >>> # Create feature (e.g., returns signal)
36
+ >>> feature = np.random.randn(1000) * 0.02 # ~2% volatility white noise
37
+ >>>
38
+ >>> # Configure diagnostics
39
+ >>> config = FeatureDiagnosticsConfig(
40
+ ... run_stationarity=True,
41
+ ... run_autocorrelation=True,
42
+ ... run_volatility=True,
43
+ ... run_distribution=True
44
+ ... )
45
+ >>>
46
+ >>> # Run diagnostics
47
+ >>> diagnostics = FeatureDiagnostics(config)
48
+ >>> result = diagnostics.run_diagnostics(feature, name="momentum_signal")
49
+ >>>
50
+ >>> # Review results
51
+ >>> print(result.summary())
52
+ >>> print(result.summary_df)
53
+ >>>
54
+ >>> # Check specific properties
55
+ >>> if result.stationarity.consensus == "strong_stationary":
56
+ ... print("Feature is stationary - safe to use directly")
57
+ >>> if result.volatility.has_clustering:
58
+ ... print("Feature has volatility clustering - consider GARCH modeling")
59
+ """
60
+
61
+ from __future__ import annotations
62
+
63
+ from dataclasses import dataclass, field
64
+ from typing import Literal
65
+
66
+ import numpy as np
67
+ import pandas as pd
68
+
69
+ from ml4t.diagnostic.errors import ValidationError
70
+ from ml4t.diagnostic.logging import get_logger
71
+
72
+ # Import all diagnostic modules
73
+ from .autocorrelation import AutocorrelationAnalysisResult, analyze_autocorrelation
74
+ from .distribution import DistributionAnalysisResult, analyze_distribution
75
+ from .stationarity import StationarityAnalysisResult, analyze_stationarity
76
+ from .volatility import VolatilityAnalysisResult, analyze_volatility
77
+
78
+ logger = get_logger(__name__)
79
+
80
+
81
+ @dataclass
82
+ class FeatureDiagnosticsConfig:
83
+ """Configuration for feature diagnostic analysis.
84
+
85
+ Controls which diagnostic tests are run and their parameters.
86
+
87
+ Attributes:
88
+ run_stationarity: Whether to run stationarity tests (ADF, KPSS, PP)
89
+ run_autocorrelation: Whether to run autocorrelation analysis (ACF, PACF)
90
+ run_volatility: Whether to run volatility clustering tests (ARCH-LM, GARCH)
91
+ run_distribution: Whether to run distribution diagnostics (moments, normality, tails)
92
+ alpha: Significance level for statistical tests (default: 0.05)
93
+ stationarity_tests: Which stationarity tests to run
94
+ max_acf_lags: Maximum lags for ACF/PACF (None = auto-determine)
95
+ arch_lm_lags: Lags for ARCH-LM test (None = auto-determine)
96
+ fit_garch: Whether to fit GARCH model when ARCH effects detected
97
+ normality_tests: Which normality tests to run
98
+ tail_analysis: Whether to perform heavy tail analysis
99
+ verbose: Whether to log detailed progress information
100
+ """
101
+
102
+ run_stationarity: bool = True
103
+ run_autocorrelation: bool = True
104
+ run_volatility: bool = True
105
+ run_distribution: bool = True
106
+
107
+ alpha: float = 0.05
108
+
109
+ # Stationarity options
110
+ stationarity_tests: list[Literal["adf", "kpss", "pp"]] | None = None
111
+
112
+ # Autocorrelation options
113
+ max_acf_lags: int | None = None
114
+
115
+ # Volatility options
116
+ arch_lags: int = 12
117
+ fit_garch: bool = True
118
+
119
+ # Distribution options
120
+ compute_tails: bool = True
121
+
122
+ # Logging
123
+ verbose: bool = False
124
+
125
+ def __post_init__(self):
126
+ """Validate configuration."""
127
+ if not (0 < self.alpha < 1):
128
+ raise ValidationError(f"alpha must be in (0, 1), got {self.alpha}")
129
+
130
+ if not any(
131
+ [
132
+ self.run_stationarity,
133
+ self.run_autocorrelation,
134
+ self.run_volatility,
135
+ self.run_distribution,
136
+ ]
137
+ ):
138
+ raise ValidationError("At least one diagnostic module must be enabled")
139
+
140
+
141
+ @dataclass
142
+ class FeatureDiagnosticsResult:
143
+ """Results from comprehensive feature diagnostic analysis.
144
+
145
+ Aggregates results from all diagnostic modules with high-level summary.
146
+
147
+ Attributes:
148
+ feature_name: Name/identifier for the feature
149
+ n_obs: Number of observations in feature
150
+ stationarity: Stationarity analysis result (None if not run)
151
+ autocorrelation: Autocorrelation analysis result (None if not run)
152
+ volatility: Volatility clustering analysis result (None if not run)
153
+ distribution: Distribution diagnostics result (None if not run)
154
+ summary_df: DataFrame summarizing all test results
155
+ recommendations: List of recommendations based on diagnostic results
156
+ health_score: Overall feature health score (0.0 to 1.0)
157
+ flags: List of warning flags raised by diagnostics
158
+ """
159
+
160
+ feature_name: str
161
+ n_obs: int
162
+
163
+ stationarity: StationarityAnalysisResult | None = None
164
+ autocorrelation: AutocorrelationAnalysisResult | None = None
165
+ volatility: VolatilityAnalysisResult | None = None
166
+ distribution: DistributionAnalysisResult | None = None
167
+
168
+ summary_df: pd.DataFrame = field(default_factory=pd.DataFrame)
169
+ recommendations: list[str] = field(default_factory=list)
170
+ health_score: float = 0.0
171
+ flags: list[str] = field(default_factory=list)
172
+
173
+ def __post_init__(self):
174
+ """Calculate derived fields after initialization."""
175
+ if self.summary_df.empty:
176
+ self.summary_df = self._create_summary_df()
177
+
178
+ if not self.recommendations:
179
+ self.recommendations = self._generate_recommendations()
180
+
181
+ if self.health_score == 0.0:
182
+ self.health_score = self._calculate_health_score()
183
+
184
+ if not self.flags:
185
+ self.flags = self._identify_flags()
186
+
187
+ def _create_summary_df(self) -> pd.DataFrame:
188
+ """Create summary DataFrame from all diagnostic results.
189
+
190
+ Returns:
191
+ DataFrame with one row per test showing key statistics
192
+ """
193
+ rows = []
194
+
195
+ # Stationarity tests
196
+ if self.stationarity is not None:
197
+ if self.stationarity.adf_result is not None:
198
+ rows.append(
199
+ {
200
+ "Module": "Stationarity",
201
+ "Test": "ADF",
202
+ "Statistic": self.stationarity.adf_result.test_statistic,
203
+ "P-Value": self.stationarity.adf_result.p_value,
204
+ "Result": (
205
+ "Stationary"
206
+ if self.stationarity.adf_result.is_stationary
207
+ else "Non-stationary"
208
+ ),
209
+ }
210
+ )
211
+
212
+ if self.stationarity.kpss_result is not None:
213
+ rows.append(
214
+ {
215
+ "Module": "Stationarity",
216
+ "Test": "KPSS",
217
+ "Statistic": self.stationarity.kpss_result.test_statistic,
218
+ "P-Value": self.stationarity.kpss_result.p_value,
219
+ "Result": (
220
+ "Stationary"
221
+ if self.stationarity.kpss_result.is_stationary
222
+ else "Non-stationary"
223
+ ),
224
+ }
225
+ )
226
+
227
+ if self.stationarity.pp_result is not None:
228
+ rows.append(
229
+ {
230
+ "Module": "Stationarity",
231
+ "Test": "PP",
232
+ "Statistic": self.stationarity.pp_result.test_statistic,
233
+ "P-Value": self.stationarity.pp_result.p_value,
234
+ "Result": (
235
+ "Stationary"
236
+ if self.stationarity.pp_result.is_stationary
237
+ else "Non-stationary"
238
+ ),
239
+ }
240
+ )
241
+
242
+ # Add consensus row
243
+ rows.append(
244
+ {
245
+ "Module": "Stationarity",
246
+ "Test": "Consensus",
247
+ "Statistic": None,
248
+ "P-Value": None,
249
+ "Result": self.stationarity.consensus,
250
+ }
251
+ )
252
+
253
+ # Autocorrelation tests
254
+ if self.autocorrelation is not None:
255
+ n_significant_acf = len(self.autocorrelation.significant_acf_lags)
256
+ n_significant_pacf = len(self.autocorrelation.significant_pacf_lags)
257
+
258
+ rows.append(
259
+ {
260
+ "Module": "Autocorrelation",
261
+ "Test": "ACF",
262
+ "Statistic": None, # No single max ACF statistic
263
+ "P-Value": None,
264
+ "Result": f"{n_significant_acf} significant lags",
265
+ }
266
+ )
267
+
268
+ rows.append(
269
+ {
270
+ "Module": "Autocorrelation",
271
+ "Test": "PACF",
272
+ "Statistic": None, # No single max PACF statistic
273
+ "P-Value": None,
274
+ "Result": f"{n_significant_pacf} significant lags",
275
+ }
276
+ )
277
+
278
+ rows.append(
279
+ {
280
+ "Module": "Autocorrelation",
281
+ "Test": "Consensus",
282
+ "Statistic": None,
283
+ "P-Value": None,
284
+ "Result": (
285
+ "No autocorrelation"
286
+ if self.autocorrelation.is_white_noise
287
+ else "Has autocorrelation"
288
+ ),
289
+ }
290
+ )
291
+
292
+ # Volatility tests
293
+ if self.volatility is not None:
294
+ rows.append(
295
+ {
296
+ "Module": "Volatility",
297
+ "Test": "ARCH-LM",
298
+ "Statistic": self.volatility.arch_lm_result.test_statistic,
299
+ "P-Value": self.volatility.arch_lm_result.p_value,
300
+ "Result": (
301
+ "ARCH effects"
302
+ if self.volatility.arch_lm_result.has_arch_effects
303
+ else "No ARCH effects"
304
+ ),
305
+ }
306
+ )
307
+
308
+ if self.volatility.garch_result is not None:
309
+ # Note: Currently always GARCH(1,1) - p and q not stored in result
310
+ rows.append(
311
+ {
312
+ "Module": "Volatility",
313
+ "Test": "GARCH",
314
+ "Statistic": None,
315
+ "P-Value": None,
316
+ "Result": "GARCH(1,1) fit",
317
+ }
318
+ )
319
+
320
+ rows.append(
321
+ {
322
+ "Module": "Volatility",
323
+ "Test": "Consensus",
324
+ "Statistic": None,
325
+ "P-Value": None,
326
+ "Result": (
327
+ "Has clustering"
328
+ if self.volatility.has_volatility_clustering
329
+ else "No clustering"
330
+ ),
331
+ }
332
+ )
333
+
334
+ # Distribution tests
335
+ if self.distribution is not None:
336
+ if self.distribution.moments_result is not None:
337
+ rows.append(
338
+ {
339
+ "Module": "Distribution",
340
+ "Test": "Skewness",
341
+ "Statistic": self.distribution.moments_result.skewness,
342
+ "P-Value": None,
343
+ "Result": (
344
+ "Significant"
345
+ if self.distribution.moments_result.skewness_significant
346
+ else "Not significant"
347
+ ),
348
+ }
349
+ )
350
+
351
+ rows.append(
352
+ {
353
+ "Module": "Distribution",
354
+ "Test": "Excess Kurtosis",
355
+ "Statistic": self.distribution.moments_result.excess_kurtosis,
356
+ "P-Value": None,
357
+ "Result": (
358
+ "Significant"
359
+ if self.distribution.moments_result.excess_kurtosis_significant
360
+ else "Not significant"
361
+ ),
362
+ }
363
+ )
364
+
365
+ if self.distribution.jarque_bera_result is not None:
366
+ rows.append(
367
+ {
368
+ "Module": "Distribution",
369
+ "Test": "Jarque-Bera",
370
+ "Statistic": self.distribution.jarque_bera_result.statistic,
371
+ "P-Value": self.distribution.jarque_bera_result.p_value,
372
+ "Result": (
373
+ "Normal"
374
+ if self.distribution.jarque_bera_result.is_normal
375
+ else "Not normal"
376
+ ),
377
+ }
378
+ )
379
+
380
+ if self.distribution.shapiro_wilk_result is not None:
381
+ rows.append(
382
+ {
383
+ "Module": "Distribution",
384
+ "Test": "Shapiro-Wilk",
385
+ "Statistic": self.distribution.shapiro_wilk_result.statistic,
386
+ "P-Value": self.distribution.shapiro_wilk_result.p_value,
387
+ "Result": (
388
+ "Normal"
389
+ if self.distribution.shapiro_wilk_result.is_normal
390
+ else "Not normal"
391
+ ),
392
+ }
393
+ )
394
+
395
+ if (
396
+ self.distribution.tail_analysis_result is not None
397
+ and self.distribution.tail_analysis_result.hill_result is not None
398
+ ):
399
+ rows.append(
400
+ {
401
+ "Module": "Distribution",
402
+ "Test": "Hill Estimator",
403
+ "Statistic": self.distribution.tail_analysis_result.hill_result.tail_index,
404
+ "P-Value": None,
405
+ "Result": self.distribution.tail_analysis_result.hill_result.classification.replace(
406
+ "_", " "
407
+ ).title(),
408
+ }
409
+ )
410
+
411
+ rows.append(
412
+ {
413
+ "Module": "Distribution",
414
+ "Test": "Recommended",
415
+ "Statistic": None,
416
+ "P-Value": None,
417
+ "Result": self.distribution.recommended_distribution,
418
+ }
419
+ )
420
+
421
+ return pd.DataFrame(rows)
422
+
423
+ def _generate_recommendations(self) -> list[str]:
424
+ """Generate actionable recommendations based on diagnostic results.
425
+
426
+ Returns:
427
+ List of recommendation strings
428
+ """
429
+ recommendations = []
430
+
431
+ # Stationarity recommendations
432
+ if self.stationarity is not None:
433
+ if self.stationarity.consensus in ["strong_nonstationary", "likely_nonstationary"]:
434
+ recommendations.append(
435
+ "Feature is non-stationary. Consider differencing or detrending before use."
436
+ )
437
+ elif self.stationarity.consensus == "inconclusive":
438
+ recommendations.append(
439
+ "Stationarity tests are inconclusive. Try longer time series or alternative transformations."
440
+ )
441
+
442
+ # Autocorrelation recommendations
443
+ if self.autocorrelation is not None and not self.autocorrelation.is_white_noise:
444
+ max_lag = max(
445
+ self.autocorrelation.significant_acf_lags
446
+ + self.autocorrelation.significant_pacf_lags,
447
+ default=0,
448
+ )
449
+ recommendations.append(
450
+ f"Feature has significant autocorrelation up to lag {max_lag}. "
451
+ "Consider AR/MA modeling or including lagged values as features."
452
+ )
453
+
454
+ # Volatility recommendations
455
+ if self.volatility is not None and self.volatility.has_volatility_clustering:
456
+ if self.volatility.garch_result is not None:
457
+ recommendations.append(
458
+ "Feature exhibits volatility clustering. GARCH(1,1) "
459
+ "model provides good fit. Consider using conditional volatility."
460
+ )
461
+ else:
462
+ recommendations.append(
463
+ "Feature exhibits volatility clustering (ARCH effects). "
464
+ "Consider GARCH modeling or volatility-adjusted features."
465
+ )
466
+
467
+ # Distribution recommendations
468
+ if self.distribution is not None:
469
+ rec_dist = self.distribution.recommended_distribution
470
+
471
+ if rec_dist != "normal":
472
+ recommendations.append(
473
+ f"Feature distribution is not normal (recommended: {rec_dist}). "
474
+ "Consider robust statistics or distribution-specific modeling."
475
+ )
476
+
477
+ if (
478
+ self.distribution.tail_analysis_result is not None
479
+ and self.distribution.tail_analysis_result.hill_result is not None
480
+ ):
481
+ tail_index = self.distribution.tail_analysis_result.hill_result.tail_index
482
+
483
+ if tail_index <= 2:
484
+ recommendations.append(
485
+ f"Feature has very heavy tails (α={tail_index:.2f}, variance may not exist). "
486
+ "Use robust statistics and be cautious with moment-based methods."
487
+ )
488
+ elif tail_index <= 4:
489
+ recommendations.append(
490
+ f"Feature has heavy tails (α={tail_index:.2f}, kurtosis may not exist). "
491
+ "Consider Student-t or stable distributions."
492
+ )
493
+
494
+ # General recommendations
495
+ if not recommendations:
496
+ recommendations.append(
497
+ "Feature passes all diagnostic checks. Safe to use in modeling without transformation."
498
+ )
499
+
500
+ return recommendations
501
+
502
+ def _calculate_health_score(self) -> float:
503
+ """Calculate overall feature health score (0.0 to 1.0).
504
+
505
+ Combines results from all modules into single score.
506
+ Higher score = better feature quality.
507
+
508
+ Returns:
509
+ Health score from 0.0 (poor) to 1.0 (excellent)
510
+ """
511
+ score = 0.0
512
+ max_score = 0.0
513
+
514
+ # Stationarity contribution (0.25 weight)
515
+ if self.stationarity is not None:
516
+ max_score += 0.25
517
+ if self.stationarity.consensus == "strong_stationary":
518
+ score += 0.25
519
+ elif self.stationarity.consensus == "likely_stationary":
520
+ score += 0.20
521
+ elif self.stationarity.consensus == "inconclusive":
522
+ score += 0.10
523
+
524
+ # Autocorrelation contribution (0.25 weight)
525
+ # Note: Having some autocorrelation is OK, extreme is bad
526
+ if self.autocorrelation is not None:
527
+ max_score += 0.25
528
+ n_significant_acf = len(self.autocorrelation.significant_acf_lags)
529
+ if n_significant_acf == 0:
530
+ score += 0.25 # No autocorrelation is good
531
+ elif n_significant_acf <= 5:
532
+ score += 0.20 # Some autocorrelation is manageable
533
+ elif n_significant_acf <= 10:
534
+ score += 0.10 # Moderate autocorrelation
535
+ # else: 0.0 for excessive autocorrelation
536
+
537
+ # Volatility contribution (0.25 weight)
538
+ # Note: Having ARCH effects is OK if we can model them
539
+ if self.volatility is not None:
540
+ max_score += 0.25
541
+ if not self.volatility.has_volatility_clustering:
542
+ score += 0.25 # No clustering is ideal
543
+ elif (
544
+ self.volatility.garch_result is not None and self.volatility.garch_result.converged
545
+ ):
546
+ score += 0.20 # Has clustering but GARCH fits well
547
+ else:
548
+ score += 0.10 # Has clustering but harder to model
549
+
550
+ # Distribution contribution (0.25 weight)
551
+ if self.distribution is not None:
552
+ max_score += 0.25
553
+ rec_dist = self.distribution.recommended_distribution
554
+
555
+ if rec_dist == "normal":
556
+ score += 0.25 # Normal is ideal
557
+ elif rec_dist in ["t", "heavy-tailed"]:
558
+ score += 0.15 # Heavy tails but manageable
559
+ elif rec_dist == "stable":
560
+ score += 0.05 # Extreme tails, difficult to work with
561
+ # else: "lognormal", "uniform" get default 0.0
562
+
563
+ # Normalize to 0.0-1.0 range
564
+ if max_score > 0:
565
+ return score / max_score
566
+ return 0.0
567
+
568
+ def _identify_flags(self) -> list[str]:
569
+ """Identify warning flags from diagnostic results.
570
+
571
+ Returns:
572
+ List of warning flag strings
573
+ """
574
+ flags = []
575
+
576
+ # Stationarity flags
577
+ if self.stationarity is not None:
578
+ if self.stationarity.consensus in ["strong_nonstationary", "likely_nonstationary"]:
579
+ flags.append("NON_STATIONARY")
580
+
581
+ # Autocorrelation flags
582
+ if self.autocorrelation is not None:
583
+ n_significant_acf = len(self.autocorrelation.significant_acf_lags)
584
+ if n_significant_acf > 10:
585
+ flags.append("EXCESSIVE_AUTOCORRELATION")
586
+
587
+ # Volatility flags
588
+ if self.volatility is not None:
589
+ if self.volatility.has_volatility_clustering:
590
+ flags.append("VOLATILITY_CLUSTERING")
591
+ if (
592
+ self.volatility.garch_result is not None
593
+ and not self.volatility.garch_result.converged
594
+ ):
595
+ flags.append("GARCH_NO_CONVERGENCE")
596
+
597
+ # Distribution flags
598
+ if self.distribution is not None:
599
+ if not self.distribution.is_normal:
600
+ flags.append("NON_NORMAL")
601
+
602
+ if (
603
+ self.distribution.tail_analysis_result is not None
604
+ and self.distribution.tail_analysis_result.hill_result is not None
605
+ ):
606
+ tail_index = self.distribution.tail_analysis_result.hill_result.tail_index
607
+ if tail_index <= 2:
608
+ flags.append("VERY_HEAVY_TAILS")
609
+ elif tail_index <= 4:
610
+ flags.append("HEAVY_TAILS")
611
+
612
+ if self.distribution.moments_result is not None:
613
+ if abs(self.distribution.moments_result.skewness) > 1:
614
+ flags.append("HIGH_SKEWNESS")
615
+ if self.distribution.moments_result.excess_kurtosis > 10:
616
+ flags.append("HIGH_EXCESS_KURTOSIS")
617
+
618
+ return flags
619
+
620
+ def summary(self) -> str:
621
+ """Generate human-readable summary of diagnostic results.
622
+
623
+ Returns:
624
+ Multi-line summary string
625
+ """
626
+ lines = []
627
+ lines.append(f"Feature Diagnostics: {self.feature_name}")
628
+ lines.append(f"Observations: {self.n_obs}")
629
+ lines.append(f"Health Score: {self.health_score:.2f}/1.00")
630
+ lines.append("")
631
+
632
+ # Module summaries
633
+ if self.stationarity is not None:
634
+ lines.append(f"Stationarity: {self.stationarity.consensus}")
635
+
636
+ if self.autocorrelation is not None:
637
+ n_significant = len(self.autocorrelation.significant_acf_lags)
638
+ lines.append(f"Autocorrelation: {n_significant} significant lags")
639
+
640
+ if self.volatility is not None:
641
+ vol_str = (
642
+ "Has clustering" if self.volatility.has_volatility_clustering else "No clustering"
643
+ )
644
+ lines.append(f"Volatility: {vol_str}")
645
+
646
+ if self.distribution is not None:
647
+ lines.append(f"Distribution: {self.distribution.recommended_distribution}")
648
+
649
+ lines.append("")
650
+
651
+ # Flags
652
+ if self.flags:
653
+ lines.append(f"Flags: {', '.join(self.flags)}")
654
+ lines.append("")
655
+
656
+ # Recommendations
657
+ lines.append("Recommendations:")
658
+ for i, rec in enumerate(self.recommendations, 1):
659
+ lines.append(f" {i}. {rec}")
660
+
661
+ return "\n".join(lines)
662
+
663
+
664
+ class FeatureDiagnostics:
665
+ """Main API class for feature-level diagnostic analysis.
666
+
667
+ Orchestrates all Module A diagnostic tests (stationarity, autocorrelation,
668
+ volatility, distribution) with configurable options and batch processing.
669
+
670
+ Example:
671
+ >>> import numpy as np
672
+ >>> from ml4t.diagnostic.evaluation.feature_diagnostics import (
673
+ ... FeatureDiagnostics,
674
+ ... FeatureDiagnosticsConfig
675
+ ... )
676
+ >>>
677
+ >>> # Configure and run diagnostics
678
+ >>> config = FeatureDiagnosticsConfig()
679
+ >>> diagnostics = FeatureDiagnostics(config)
680
+ >>>
681
+ >>> feature = np.random.randn(1000)
682
+ >>> result = diagnostics.run_diagnostics(feature, name="my_feature")
683
+ >>>
684
+ >>> print(result.summary())
685
+ >>> print(f"Health Score: {result.health_score:.2f}")
686
+ """
687
+
688
+ def __init__(self, config: FeatureDiagnosticsConfig | None = None):
689
+ """Initialize FeatureDiagnostics with configuration.
690
+
691
+ Args:
692
+ config: Configuration object. If None, uses defaults (all tests enabled).
693
+ """
694
+ self.config = config or FeatureDiagnosticsConfig()
695
+
696
+ def run_diagnostics(
697
+ self,
698
+ data: pd.Series | np.ndarray,
699
+ name: str = "feature",
700
+ ) -> FeatureDiagnosticsResult:
701
+ """Run comprehensive diagnostic analysis on a single feature.
702
+
703
+ Args:
704
+ data: Feature time series (1D array or Series)
705
+ name: Name/identifier for the feature
706
+
707
+ Returns:
708
+ FeatureDiagnosticsResult with all test results and recommendations
709
+
710
+ Raises:
711
+ ValidationError: If data is invalid (empty, wrong shape, etc.)
712
+
713
+ Example:
714
+ >>> import numpy as np
715
+ >>> diagnostics = FeatureDiagnostics()
716
+ >>> feature = np.random.randn(1000)
717
+ >>> result = diagnostics.run_diagnostics(feature, name="returns")
718
+ >>> print(result.summary())
719
+ """
720
+ # Validate input
721
+ if isinstance(data, pd.Series):
722
+ data_array = data.to_numpy()
723
+ n_obs = len(data)
724
+ elif isinstance(data, np.ndarray):
725
+ if data.ndim != 1:
726
+ raise ValidationError(f"Data must be 1-dimensional, got shape {data.shape}")
727
+ data_array = data
728
+ n_obs = len(data)
729
+ else:
730
+ raise ValidationError(
731
+ f"Data must be pd.Series or np.ndarray, got {type(data).__name__}"
732
+ )
733
+
734
+ if n_obs == 0:
735
+ raise ValidationError("Data must not be empty")
736
+
737
+ if self.config.verbose:
738
+ logger.info(f"Running diagnostics on feature '{name}' ({n_obs} observations)")
739
+
740
+ # Initialize result containers
741
+ stationarity_result = None
742
+ autocorrelation_result = None
743
+ volatility_result = None
744
+ distribution_result = None
745
+
746
+ # Run stationarity tests
747
+ if self.config.run_stationarity:
748
+ if self.config.verbose:
749
+ logger.info(" Running stationarity tests...")
750
+
751
+ try:
752
+ stationarity_result = analyze_stationarity(
753
+ data_array,
754
+ alpha=self.config.alpha,
755
+ include_tests=self.config.stationarity_tests,
756
+ )
757
+ except Exception as e:
758
+ logger.warning(f"Stationarity analysis failed: {e}")
759
+
760
+ # Run autocorrelation analysis
761
+ if self.config.run_autocorrelation:
762
+ if self.config.verbose:
763
+ logger.info(" Running autocorrelation analysis...")
764
+
765
+ try:
766
+ autocorrelation_result = analyze_autocorrelation(
767
+ data_array,
768
+ alpha=self.config.alpha,
769
+ max_lags=self.config.max_acf_lags,
770
+ )
771
+ except Exception as e:
772
+ logger.warning(f"Autocorrelation analysis failed: {e}")
773
+
774
+ # Run volatility clustering tests
775
+ if self.config.run_volatility:
776
+ if self.config.verbose:
777
+ logger.info(" Running volatility clustering tests...")
778
+
779
+ try:
780
+ volatility_result = analyze_volatility(
781
+ data_array,
782
+ arch_lags=self.config.arch_lags,
783
+ fit_garch_model=self.config.fit_garch,
784
+ alpha=self.config.alpha,
785
+ )
786
+ except Exception as e:
787
+ logger.warning(f"Volatility analysis failed: {e}")
788
+
789
+ # Run distribution diagnostics
790
+ if self.config.run_distribution:
791
+ if self.config.verbose:
792
+ logger.info(" Running distribution diagnostics...")
793
+
794
+ try:
795
+ distribution_result = analyze_distribution(
796
+ data_array,
797
+ alpha=self.config.alpha,
798
+ compute_tails=self.config.compute_tails,
799
+ )
800
+ except Exception as e:
801
+ logger.warning(f"Distribution analysis failed: {e}")
802
+
803
+ if self.config.verbose:
804
+ logger.info(" Diagnostics complete")
805
+
806
+ # Create and return result
807
+ result = FeatureDiagnosticsResult(
808
+ feature_name=name,
809
+ n_obs=n_obs,
810
+ stationarity=stationarity_result,
811
+ autocorrelation=autocorrelation_result,
812
+ volatility=volatility_result,
813
+ distribution=distribution_result,
814
+ )
815
+
816
+ return result
817
+
818
+ def run_batch_diagnostics(
819
+ self,
820
+ data: pd.DataFrame,
821
+ feature_names: list[str] | None = None,
822
+ ) -> dict[str, FeatureDiagnosticsResult]:
823
+ """Run diagnostics on multiple features in batch.
824
+
825
+ Args:
826
+ data: DataFrame with features as columns
827
+ feature_names: Column names to analyze. If None, analyzes all columns.
828
+
829
+ Returns:
830
+ Dictionary mapping feature name to FeatureDiagnosticsResult
831
+
832
+ Example:
833
+ >>> import pandas as pd
834
+ >>> import numpy as np
835
+ >>>
836
+ >>> # Create multi-feature DataFrame
837
+ >>> df = pd.DataFrame({
838
+ ... 'momentum': np.random.randn(1000),
839
+ ... 'mean_reversion': np.random.randn(1000),
840
+ ... 'volatility': np.abs(np.random.randn(1000))
841
+ ... })
842
+ >>>
843
+ >>> diagnostics = FeatureDiagnostics()
844
+ >>> results = diagnostics.run_batch_diagnostics(df)
845
+ >>>
846
+ >>> for name, result in results.items():
847
+ ... print(f"\n{name}:")
848
+ ... print(f" Health: {result.health_score:.2f}")
849
+ ... print(f" Flags: {result.flags}")
850
+ """
851
+ if not isinstance(data, pd.DataFrame):
852
+ raise ValidationError(
853
+ f"Data must be pd.DataFrame for batch processing, got {type(data).__name__}"
854
+ )
855
+
856
+ if feature_names is None:
857
+ feature_names = list(data.columns)
858
+
859
+ if self.config.verbose:
860
+ logger.info(f"Running batch diagnostics on {len(feature_names)} features")
861
+
862
+ results = {}
863
+ for name in feature_names:
864
+ if name not in data.columns:
865
+ logger.warning(f"Feature '{name}' not found in DataFrame, skipping")
866
+ continue
867
+
868
+ if self.config.verbose:
869
+ logger.info(f"\nProcessing feature: {name}")
870
+
871
+ results[name] = self.run_diagnostics(data[name], name=name)
872
+
873
+ return results